From 9c712486a17b1af0c15ba1758f1d7aa0f9fceb96 Mon Sep 17 00:00:00 2001 From: Jason Volk Date: Sun, 1 Oct 2017 21:14:34 -0700 Subject: [PATCH] ircd: Split up lexical.h; comments; cleanup. --- include/ircd/lex_cast.h | 265 ++++++++++ include/ircd/stdinc.h | 4 +- include/ircd/{lexical.h => stringops.h} | 612 ++++++------------------ include/ircd/tokens.h | 205 ++++++++ ircd/lexical.cc | 143 +++--- 5 files changed, 710 insertions(+), 519 deletions(-) create mode 100644 include/ircd/lex_cast.h rename include/ircd/{lexical.h => stringops.h} (53%) create mode 100644 include/ircd/tokens.h diff --git a/include/ircd/lex_cast.h b/include/ircd/lex_cast.h new file mode 100644 index 000000000..52c15fce3 --- /dev/null +++ b/include/ircd/lex_cast.h @@ -0,0 +1,265 @@ +/* + * charybdis: an advanced ircd. + * inline/stringops.h: inlined string operations used in a few places + * + * Copyright (C) 2005-2016 Charybdis Development Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + */ + +#pragma once +#define HAVE_IRCD_LEX_CAST_H + +// +// Lexical conversions +// +namespace ircd +{ + IRCD_EXCEPTION_HIDENAME(ircd::error, bad_lex_cast) + + template bool try_lex_cast(const string_view &); + + template T lex_cast(std::string &); + template T lex_cast(const std::string &); + template T lex_cast(const std::string_view &); + template T lex_cast(const string_view &); + + // User supplied destination buffer + template string_view lex_cast(T, char *const &buf, const size_t &max); + + // Circular static thread_local buffer + const size_t LEX_CAST_BUFS {256}; // plenty + template string_view lex_cast(const T &t); + + // Binary <-> Hex conversion suite + string_view u2a(const mutable_buffer &out, const const_raw_buffer &in); + const_raw_buffer a2u(const mutable_raw_buffer &out, const const_buffer &in); + + // Binary <-> Base64 conversion suite + string_view b64encode(const mutable_buffer &out, const const_raw_buffer &in); +} + +namespace ircd +{ + template<> bool try_lex_cast(const string_view &); // stub always true + template<> bool try_lex_cast(const string_view &); // stub always true + template<> bool try_lex_cast(const string_view &); // stub always true + template<> bool try_lex_cast(const string_view &); + template<> bool try_lex_cast(const string_view &); + template<> bool try_lex_cast(const string_view &); + template<> bool try_lex_cast(const string_view &); + template<> bool try_lex_cast(const string_view &); + template<> bool try_lex_cast(const string_view &); + template<> bool try_lex_cast(const string_view &); + template<> bool try_lex_cast(const string_view &); + template<> bool try_lex_cast(const string_view &); + template<> bool try_lex_cast(const string_view &); + template<> bool try_lex_cast(const string_view &); + + template<> std::string &lex_cast(std::string &); // trivial + template<> std::string lex_cast(const std::string &); // trivial + template<> std::string_view lex_cast(const std::string_view &); // trivial + template<> std::string lex_cast(const string_view &); // trivial + template<> long double lex_cast(const string_view &); + template<> double lex_cast(const string_view &); + template<> ulong lex_cast(const string_view &); + template<> long lex_cast(const string_view &); + template<> uint lex_cast(const string_view &); + template<> int lex_cast(const string_view &); + template<> ushort lex_cast(const string_view &); + template<> short lex_cast(const string_view &); + template<> uint8_t lex_cast(const string_view &); + template<> int8_t lex_cast(const string_view &); + template<> bool lex_cast(const string_view &); + + template<> string_view lex_cast(const std::string &, char *const &buf, const size_t &max); + template<> string_view lex_cast(const std::string_view &, char *const &buf, const size_t &max); + template<> string_view lex_cast(const string_view &, char *const &buf, const size_t &max); + template<> string_view lex_cast(long double, char *const &buf, const size_t &max); + template<> string_view lex_cast(double, char *const &buf, const size_t &max); + template<> string_view lex_cast(ulong, char *const &buf, const size_t &max); + template<> string_view lex_cast(long, char *const &buf, const size_t &max); + template<> string_view lex_cast(uint, char *const &buf, const size_t &max); + template<> string_view lex_cast(int, char *const &buf, const size_t &max); + template<> string_view lex_cast(ushort, char *const &buf, const size_t &max); + template<> string_view lex_cast(short, char *const &buf, const size_t &max); + template<> string_view lex_cast(uint8_t, char *const &buf, const size_t &max); + template<> string_view lex_cast(int8_t, char *const &buf, const size_t &max); + template<> string_view lex_cast(bool, char *const &buf, const size_t &max); +} + +/// Convert a native number to a string. The returned value is a view of the +/// string in a static ring buffer. There are LEX_CAST_BUFS number of buffers +/// so you should not hold on to the returned view for very long. +template +ircd::string_view +ircd::lex_cast(const T &t) +{ + return lex_cast(t, nullptr, 0); +} + +/// Conversion to an std::string creates a copy when the input is a +/// string_view. Note this is not considered an "unnecessary lexical cast" +/// even though nothing is being converted, so there will be no warning. +template<> +inline std::string +ircd::lex_cast(const string_view &s) +{ + return std::string{s}; +} + +/// Template basis for a string_view input +template +T +ircd::lex_cast(const string_view &s) +{ + return s; +} + +/// Specialization of a string_view to string_view conversion which is just +/// a trivial copy of the view. +template<> +inline std::string_view +ircd::lex_cast(const std::string_view &s) +{ + return s; +} + +/// Specialization of a string to string conversion which generates a warning +/// because the conversion has to copy the string while no numerical conversion +/// has taken place. The developer should remove the offending lex_cast. +template<> +__attribute__((warning("unnecessary lexical cast"))) +inline std::string +ircd::lex_cast(const std::string &s) +{ + return s; +} + +/// Template basis for a const std::string input +template +T +ircd::lex_cast(const std::string &s) +{ + return lex_cast(string_view{s}); +} + +/// Template basis for an lvalue string. If we can get this binding rather +/// than the const std::string alternative some trivial conversions are +/// easier to make in the specializations. +template +T +ircd::lex_cast(std::string &s) +{ + return lex_cast(string_view{s}); +} + +/// Specialization of a string to string conversion without a warning because +/// we can trivially pass through a reference from input to output. +template<> +inline std::string & +ircd::lex_cast(std::string &s) +{ + return s; +} + +/// Specialization of a string to string conversion to user's buffer; +/// marked as unnecessary because no numerical conversion takes place yet +/// data is still copied. (note: warning may be removed; may be intentional) +template<> +__attribute__((warning("unnecessary lexical cast"))) +inline ircd::string_view +ircd::lex_cast(const string_view &s, + char *const &buf, + const size_t &max) +{ + s.copy(buf, max); + return { buf, max }; +} + +/// Specialization of a string to string conversion to user's buffer; +/// marked as unnecessary because no numerical conversion takes place yet +/// data is still copied. (note: warning may be removed; may be intentional) +template<> +__attribute__((warning("unnecessary lexical cast"))) +inline ircd::string_view +ircd::lex_cast(const std::string_view &s, + char *const &buf, + const size_t &max) +{ + s.copy(buf, max); + return { buf, max }; +} + +/// Specialization of a string to string conversion to user's buffer; +/// marked as unnecessary because no numerical conversion takes place yet +/// data is still copied. (note: warning may be removed; may be intentional) +template<> +__attribute__((warning("unnecessary lexical cast"))) +inline ircd::string_view +ircd::lex_cast(const std::string &s, + char *const &buf, + const size_t &max) +{ + s.copy(buf, max); + return { buf, max }; +} + +/// Template basis; if no specialization is matched there is no fallback here +template +__attribute__((error("unsupported lexical cast"))) +ircd::string_view +ircd::lex_cast(T t, + char *const &buf, + const size_t &max) +{ + assert(0); + return {}; +} + +/// Template basis; if no specialization is matched there is no fallback here +template +__attribute__((error("unsupported lexical cast"))) +bool +ircd::try_lex_cast(const string_view &s) +{ + assert(0); + return false; +} + +/// Trivial conversion; always returns true +template<> +inline bool +ircd::try_lex_cast(const string_view &) +{ + return true; +} + +/// Trivial conversion; always returns true +template<> +inline bool +ircd::try_lex_cast(const string_view &) +{ + return true; +} + +/// Trivial conversion; always returns true +template<> +inline bool +ircd::try_lex_cast(const string_view &s) +{ + return true; +} diff --git a/include/ircd/stdinc.h b/include/ircd/stdinc.h index 6c09810c4..91611a12a 100644 --- a/include/ircd/stdinc.h +++ b/include/ircd/stdinc.h @@ -192,7 +192,9 @@ namespace ircd #include "localee.h" #include "life_guard.h" #include "color.h" -#include "lexical.h" +#include "lex_cast.h" +#include "stringops.h" +#include "tokens.h" #include "params.h" #include "iov.h" #include "parse.h" diff --git a/include/ircd/lexical.h b/include/ircd/stringops.h similarity index 53% rename from include/ircd/lexical.h rename to include/ircd/stringops.h index 12610b5ab..9836e0467 100644 --- a/include/ircd/lexical.h +++ b/include/ircd/stringops.h @@ -2,7 +2,7 @@ * charybdis: an advanced ircd. * inline/stringops.h: inlined string operations used in a few places * - * Copyright (C) 2005-2016 Charybdis Development Team + * Copyright (C) 2005-2017 Charybdis Development Team * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -21,154 +21,13 @@ */ #pragma once -#define HAVE_IRCD_LEXICAL_H +#define HAVE_IRCD_STRING_H // -// Lexical conversions +// Misc string utilities // namespace ircd { - IRCD_EXCEPTION_HIDENAME(ircd::error, bad_lex_cast) - - template bool try_lex_cast(const string_view &); - template<> bool try_lex_cast(const string_view &); // stub always true - template<> bool try_lex_cast(const string_view &); // stub always true - template<> bool try_lex_cast(const string_view &); // stub always true - template<> bool try_lex_cast(const string_view &); - template<> bool try_lex_cast(const string_view &); - template<> bool try_lex_cast(const string_view &); - template<> bool try_lex_cast(const string_view &); - template<> bool try_lex_cast(const string_view &); - template<> bool try_lex_cast(const string_view &); - template<> bool try_lex_cast(const string_view &); - template<> bool try_lex_cast(const string_view &); - template<> bool try_lex_cast(const string_view &); - template<> bool try_lex_cast(const string_view &); - template<> bool try_lex_cast(const string_view &); - - template T lex_cast(std::string &); - template T lex_cast(const std::string &); - template T lex_cast(const std::string_view &); - template T lex_cast(const string_view &); - template<> std::string &lex_cast(std::string &); // trivial - template<> std::string lex_cast(const std::string &); // trivial - template<> std::string_view lex_cast(const std::string_view &); // trivial - template<> std::string lex_cast(const string_view &); // trivial - template<> long double lex_cast(const string_view &); - template<> double lex_cast(const string_view &); - template<> ulong lex_cast(const string_view &); - template<> long lex_cast(const string_view &); - template<> uint lex_cast(const string_view &); - template<> int lex_cast(const string_view &); - template<> ushort lex_cast(const string_view &); - template<> short lex_cast(const string_view &); - template<> uint8_t lex_cast(const string_view &); - template<> int8_t lex_cast(const string_view &); - template<> bool lex_cast(const string_view &); - - // User supplied destination buffer - template string_view lex_cast(T, char *const &buf, const size_t &max); - template<> string_view lex_cast(const std::string &, char *const &buf, const size_t &max); - template<> string_view lex_cast(const std::string_view &, char *const &buf, const size_t &max); - template<> string_view lex_cast(const string_view &, char *const &buf, const size_t &max); - template<> string_view lex_cast(long double, char *const &buf, const size_t &max); - template<> string_view lex_cast(double, char *const &buf, const size_t &max); - template<> string_view lex_cast(ulong, char *const &buf, const size_t &max); - template<> string_view lex_cast(long, char *const &buf, const size_t &max); - template<> string_view lex_cast(uint, char *const &buf, const size_t &max); - template<> string_view lex_cast(int, char *const &buf, const size_t &max); - template<> string_view lex_cast(ushort, char *const &buf, const size_t &max); - template<> string_view lex_cast(short, char *const &buf, const size_t &max); - template<> string_view lex_cast(uint8_t, char *const &buf, const size_t &max); - template<> string_view lex_cast(int8_t, char *const &buf, const size_t &max); - template<> string_view lex_cast(bool, char *const &buf, const size_t &max); - - // Circular static thread_local buffer - const size_t LEX_CAST_BUFS {256}; // plenty - template string_view lex_cast(const T &t); - - // - // Binary / Hex / Base64 conversion suite - // - - string_view u2a(const mutable_buffer &out, const const_raw_buffer &in); - const_raw_buffer a2u(const mutable_raw_buffer &out, const const_buffer &in); - string_view b64encode(const mutable_buffer &out, const const_raw_buffer &in); - - // - // String tokenization. - // - - // Use the closure for best performance. Note that string_view's - // are not required to be null terminated. Construct an std::string from the view to allocate - // and copy the token with null termination. - using token_view = std::function; - void tokens(const string_view &str, const char &sep, const token_view &); - void tokens(const string_view &str, const char *const &sep, const token_view &); - size_t tokens(const string_view &str, const char &sep, const size_t &limit, const token_view &); - size_t tokens(const string_view &str, const char *const &sep, const size_t &limit, const token_view &); - - // Copies tokens into your buffer and null terminates strtok() style. Returns BYTES of buf consumed. - size_t tokens(const string_view &str, const char &sep, char *const &buf, const size_t &max, const token_view &); - size_t tokens(const string_view &str, const char *const &sep, char *const &buf, const size_t &max, const token_view &); - - // Receive token view into iterator range - template it tokens(const string_view &str, const sep &, const it &b, const it &e); - - // Receive token view into array - template size_t tokens(const string_view &str, const sep &, string_view (&buf)[N]); - template size_t tokens(const string_view &str, const sep &, std::array &); - - // Receive token view into new container (custom allocator) - template - class C, //= std::vector, - class T = string_view, - class A, - class sep> - C tokens(A&& allocator, const string_view &str, const sep &); - - // Receive token view into new container - template - class C, //= std::vector, - class T = string_view, - class A = std::allocator, - class sep> - C tokens(const string_view &str, const sep &); - - // Receive token view into new associative container (custom allocator) - template - class C, - class T = string_view, - class Comp = std::less, - class A, - class sep> - C tokens(A&& allocator, const string_view &str, const sep &); - - // Receive token view into new associative container - template - class C, - class T = string_view, - class Comp = std::less, - class A = std::allocator, - class sep> - C tokens(const string_view &str, const sep &); - - // Convenience to get individual tokens - size_t tokens_count(const string_view &str, const char &sep); - size_t tokens_count(const string_view &str, const char *const &sep); - string_view token(const string_view &str, const char &sep, const size_t &at); - string_view token(const string_view &str, const char *const &sep, const size_t &at); - string_view token_last(const string_view &str, const char &sep); - string_view token_last(const string_view &str, const char *const &sep); - string_view token_first(const string_view &str, const char &sep); - string_view token_first(const string_view &str, const char *const &sep); - string_view tokens_after(const string_view &str, const char &sep, const size_t &at); - string_view tokens_after(const string_view &str, const char *const &sep, const size_t &at); - - // - // Misc utils - // - // Simple case insensitive comparison convenience utils struct iless; struct igreater; @@ -180,39 +39,66 @@ namespace ircd size_t strlcpy(char *const &dest, const string_view &src, const size_t &bufmax); size_t strlcat(char *const &dest, const string_view &src, const size_t &bufmax); + // return view without trailing c + string_view rstrip(const string_view &str, const char &c = ' '); + string_view rstrip(const string_view &str, const string_view &c); + + // return view without leading c + string_view lstrip(const string_view &str, const char &c = ' '); + string_view lstrip(const string_view &str, const string_view &c); + + // return view without leading and trailing c + string_view strip(const string_view &str, const char &c = ' '); + string_view strip(const string_view &str, const string_view &c); + + // split view on first match of delim; delim not included; no delim .second empty + std::pair split(const string_view &str, const char &delim = ' '); + std::pair split(const string_view &str, const string_view &delim); + + // split view on last match of delim; delim not included; no delim .second empty + std::pair rsplit(const string_view &str, const char &delim = ' '); + std::pair rsplit(const string_view &str, const string_view &delim); + + // view between first match of delim a and first match of delim b after it + string_view between(const string_view &str, const string_view &a, const string_view &b); + string_view between(const string_view &str, const char &a = '(', const char &b = ')'); + + // test string endswith delim; or any of the delims in iterable + bool endswith(const string_view &str, const string_view &val); + bool endswith(const string_view &str, const char &val); + template bool endswith_any(const string_view &str, const It &begin, const It &end); + + // test string startswith delim; or any of the delims in iterable + bool startswith(const string_view &str, const string_view &val); + bool startswith(const string_view &str, const char &val); + template bool startswith_any(const string_view &str, const It &begin, const It &end); + + // test string is surrounded by val (ex. surrounded by quote characters) + bool surrounds(const string_view &str, const string_view &val); + bool surrounds(const string_view &str, const char &val); + + // pop trailing char from view + char chop(string_view &str); + + // remove trailing from view and return num chars removed + size_t chomp(string_view &str, const char &c = '\n'); + size_t chomp(string_view &str, const string_view &c); + template size_t chomp(iterators, const delim &d); + + // Convenience to strip quotes + string_view unquote(const string_view &str); + std::string unquote(std::string &&); + // Legacy char *strip_colour(char *string); char *strip_unprintable(char *string); char *reconstruct_parv(int parc, const char **parv); - - char chop(string_view &str); - size_t chomp(string_view &str, const char &c = '\n'); - size_t chomp(string_view &str, const string_view &c); - template size_t chomp(iterators, const delim &d); - string_view rstrip(const string_view &str, const char &c = ' '); - string_view rstrip(const string_view &str, const string_view &c); - string_view lstrip(const string_view &str, const char &c = ' '); - string_view lstrip(const string_view &str, const string_view &c); - string_view strip(const string_view &str, const char &c = ' '); - string_view strip(const string_view &str, const string_view &c); - std::pair split(const string_view &str, const char &delim = ' '); - std::pair split(const string_view &str, const string_view &delim); - std::pair rsplit(const string_view &str, const char &delim = ' '); - std::pair rsplit(const string_view &str, const string_view &delim); - string_view between(const string_view &str, const string_view &a, const string_view &b); - string_view between(const string_view &str, const char &a = '(', const char &b = ')'); - bool endswith(const string_view &str, const string_view &val); - bool endswith(const string_view &str, const char &val); - template bool endswith_any(const string_view &str, const It &begin, const It &end); - bool startswith(const string_view &str, const string_view &val); - bool startswith(const string_view &str, const char &val); - template bool startswith_any(const string_view &str, const It &begin, const It &end); - bool surrounds(const string_view &str, const string_view &val); - bool surrounds(const string_view &str, const char &val); - string_view unquote(string_view str); - std::string unquote(std::string &&); } +/// Remove quotes on an std::string. Only operates on an rvalue reference so +/// that a copy of the string is not created when no quotes are found, and +/// movements can take place if they are. This overload is not needed often; +/// use string_view. inline std::string ircd::unquote(std::string &&str) { @@ -225,18 +111,71 @@ ircd::unquote(std::string &&str) return std::move(str); } +/// Common convenience to remove quotes around the view of the string inline ircd::string_view -ircd::unquote(string_view str) +ircd::unquote(const string_view &str) { - if(startswith(str, '"')) - str = { str.data() + 1, str.data() + str.size() }; - - if(endswith(str, '"')) - str = { str.data(), str.data() + str.size() - 1 }; - - return str; + return strip(str, '"'); } +/// Chomps delim from all of the string views in the iterable (iterators are +/// the T::iterator pair {begin(t), end(t)} of an iterable T) and returns the +/// total number of characters removed from all operations. +template +size_t +ircd::chomp(iterators its, + const delim &d) +{ + return std::accumulate(begin(its), end(its), size_t(0), [&d] + (auto ret, const auto &s) + { + return ret += chomp(s, d); + }); +} + +/// Removes all characters from the end of the view starting with the last +/// instance of c. Different from rstrip() in that this will remove more than +/// just the delim from the end; it removes both the delim and everything after +/// it from wherever the last delim may be. Removes nothing if no delim is. +inline size_t +ircd::chomp(string_view &str, + const char &c) +{ + const auto pos(str.find_last_of(c)); + if(pos == string_view::npos) + return 0; + + assert(str.size() - pos == 1); + str = str.substr(0, pos); + return 1; +} + +/// Removes all characters from the end of the view starting with the last +/// instance of c. This matches the entire delim string c to chomp it and +/// everything after it. +inline size_t +ircd::chomp(string_view &str, + const string_view &c) +{ + const auto pos(str.find_last_of(c)); + if(pos == string_view::npos) + return 0; + + assert(str.size() - pos == c.size()); + str = str.substr(0, pos); + return c.size(); +} + +/// Removes any last character from the view, modifying the view, and returning +/// that character. +inline char +ircd::chop(string_view &str) +{ + return !str.empty()? str.pop_back() : '\0'; +} + +/// Test if a string starts and ends with character inline bool ircd::surrounds(const string_view &str, const char &val) @@ -244,6 +183,7 @@ ircd::surrounds(const string_view &str, return str.size() >= 2 && str.front() == val && str.back() == val; } +/// Test if a string starts and ends with a string inline bool ircd::surrounds(const string_view &str, const string_view &val) @@ -251,6 +191,7 @@ ircd::surrounds(const string_view &str, return startswith(str, val) && endswith(str, val); } +/// Test if a string starts with any of the values in the iterable template bool ircd::startswith_any(const string_view &str, @@ -263,6 +204,7 @@ ircd::startswith_any(const string_view &str, }); } +/// Test if a string starts with a character inline bool ircd::startswith(const string_view &str, const char &val) @@ -270,6 +212,7 @@ ircd::startswith(const string_view &str, return !str.empty() && str[0] == val; } +/// Test if a string starts with a string inline bool ircd::startswith(const string_view &str, const string_view &val) @@ -278,6 +221,7 @@ ircd::startswith(const string_view &str, return pos == 0; } +/// Test if a string ends with any of the values in iterable template bool ircd::endswith_any(const string_view &str, @@ -290,6 +234,7 @@ ircd::endswith_any(const string_view &str, }); } +/// Test if a string ends with character inline bool ircd::endswith(const string_view &str, const char &val) @@ -297,6 +242,7 @@ ircd::endswith(const string_view &str, return !str.empty() && str[str.size()-1] == val; } +/// Test if a string ends with a string inline bool ircd::endswith(const string_view &str, const string_view &val) @@ -306,6 +252,8 @@ ircd::endswith(const string_view &str, return pos == str.size() - vlen; } +/// View a string between the first match of a and the first match of b +/// after a. inline ircd::string_view ircd::between(const string_view &str, const string_view &a, @@ -314,6 +262,8 @@ ircd::between(const string_view &str, return split(split(str, a).second, b).first; } +/// View a string between the first match of a and the first match of b +/// after a. inline ircd::string_view ircd::between(const string_view &str, const char &a, @@ -322,6 +272,8 @@ ircd::between(const string_view &str, return split(split(str, a).second, b).first; } +/// Split a string on the last match of delim. Delim not included; no match +/// will return original str in pair.first, pair.second empty. inline std::pair ircd::rsplit(const string_view &str, const string_view &delim) @@ -339,6 +291,8 @@ ircd::rsplit(const string_view &str, }; } +/// Split a string on the last match of delim. Delim not included; no match +/// will return original str in pair.first, pair.second empty. inline std::pair ircd::rsplit(const string_view &str, const char &delim) @@ -356,6 +310,8 @@ ircd::rsplit(const string_view &str, }; } +/// Split a string on the first match of delim. Delim not included; no match +/// will return original str in pair.first, pair.second empty. inline std::pair ircd::split(const string_view &str, const string_view &delim) @@ -373,6 +329,8 @@ ircd::split(const string_view &str, }; } +/// Split a string on the first match of delim. Delim not included; no match +/// will return original str in pair.first, pair.second empty. inline std::pair ircd::split(const string_view &str, const char &delim) @@ -390,6 +348,7 @@ ircd::split(const string_view &str, }; } +/// Remove leading and trailing instances of c from the returned view inline ircd::string_view ircd::strip(const string_view &str, const string_view &c) @@ -397,6 +356,7 @@ ircd::strip(const string_view &str, return lstrip(rstrip(str, c), c); } +/// Remove leading and trailing instances of c from the returned view inline ircd::string_view ircd::strip(const string_view &str, const char &c) @@ -404,6 +364,7 @@ ircd::strip(const string_view &str, return lstrip(rstrip(str, c), c); } +/// Remove trailing instances of c from the returned view inline ircd::string_view ircd::rstrip(const string_view &str, const string_view &c) @@ -412,6 +373,7 @@ ircd::rstrip(const string_view &str, return pos != string_view::npos? string_view{str.substr(0, pos + 1)} : str; } +/// Remove trailing instances of c from the returned view inline ircd::string_view ircd::rstrip(const string_view &str, const char &c) @@ -420,6 +382,7 @@ ircd::rstrip(const string_view &str, return pos != string_view::npos? string_view{str.substr(0, pos + 1)} : str; } +/// Remove leading instances of c from the returned view inline ircd::string_view ircd::lstrip(const string_view &str, const char &c) @@ -428,6 +391,7 @@ ircd::lstrip(const string_view &str, return pos != string_view::npos? string_view{str.substr(pos)} : string_view{}; } +/// Remove leading instances of c from the returned view inline ircd::string_view ircd::lstrip(const string_view &str, const string_view &c) @@ -436,160 +400,7 @@ ircd::lstrip(const string_view &str, return pos != string_view::npos? string_view{str.substr(pos)} : string_view{}; } -template -size_t -ircd::chomp(iterators its, - const delim &d) -{ - return std::accumulate(begin(its), end(its), size_t(0), [&d] - (auto ret, const auto &s) - { - return ret += chomp(s, d); - }); -} - -inline size_t -ircd::chomp(string_view &str, - const char &c) -{ - const auto pos(str.find_last_of(c)); - if(pos == string_view::npos) - return 0; - - assert(str.size() - pos == 1); - str = str.substr(0, pos); - return 1; -} - -inline size_t -ircd::chomp(string_view &str, - const string_view &c) -{ - const auto pos(str.find_last_of(c)); - if(pos == string_view::npos) - return 0; - - assert(str.size() - pos == c.size()); - str = str.substr(0, pos); - return c.size(); -} - -inline char -ircd::chop(string_view &str) -{ - return !str.empty()? str.pop_back() : '\0'; -} - -template -size_t -ircd::tokens(const string_view &str, - const delim &sep, - string_view (&buf)[N]) -{ - const auto e(tokens(str, sep, begin(buf), end(buf))); - return std::distance(begin(buf), e); -} - -template -size_t -ircd::tokens(const string_view &str, - const delim &sep, - std::array &buf) -{ - const auto e(tokens(str, sep, begin(buf), end(buf))); - return std::distance(begin(buf), e); -} - -template -it -ircd::tokens(const string_view &str, - const delim &sep, - const it &b, - const it &e) -{ - it pos(b); - tokens(str, sep, std::distance(b, e), [&pos] - (const string_view &token) - { - *pos = token; - ++pos; - }); - - return pos; -} - -template - class C, - class T, - class Comp, - class A, - class delim> -C -ircd::tokens(const string_view &str, - const delim &sep) -{ - A allocator; - return tokens(allocator, str, sep); -} - -template - class C, - class T, - class Comp, - class A, - class delim> -C -ircd::tokens(A&& allocator, - const string_view &str, - const delim &sep) -{ - C ret(std::forward(allocator)); - tokens(str, sep, [&ret] - (const string_view &token) - { - ret.emplace(ret.end(), token); - }); - - return ret; -} - -template - class C, - class T, - class A, - class delim> -C -ircd::tokens(const string_view &str, - const delim &sep) -{ - A allocator; - return tokens(allocator, str, sep); -} - -template - class C, - class T, - class A, - class delim> -C -ircd::tokens(A&& allocator, - const string_view &str, - const delim &sep) -{ - C ret(std::forward(allocator)); - tokens(str, sep, [&ret] - (const string_view &token) - { - ret.emplace(ret.end(), token); - }); - - return ret; -} - +/// Copy a string to dst will guaranteed null terminated output inline size_t ircd::strlcpy(char *const &dst, const string_view &src, @@ -626,6 +437,8 @@ ircd::strlcpy(char *const &dst, } #endif +/// Append a string to dst will guaranteed null terminated output; Expects +/// dst to have null termination before calling this function. inline size_t ircd::strlcat(char *const &dst, const string_view &src, @@ -655,6 +468,8 @@ ircd::strlcat(char *const &dst, } #endif +/// Case insensitive string comparison deciding which string compares 'less' +/// than the other. struct ircd::iless { using is_transparent = std::true_type; @@ -713,6 +528,7 @@ const }); } +/// Case insensitive string comparison deciding if two strings are equal struct ircd::iequals { using is_transparent = std::true_type; @@ -771,6 +587,8 @@ const }); } +/// Case insensitive string comparison deciding which string compares 'greater' +/// than the other. struct ircd::igreater { using is_transparent = std::true_type; @@ -828,131 +646,3 @@ const return tolower(a) > tolower(b); }); } - -template -ircd::string_view -ircd::lex_cast(const T &t) -{ - return lex_cast(t, nullptr, 0); -} - -template<> -inline std::string -ircd::lex_cast(const string_view &s) -{ - return std::string{s}; -} - -template -T -ircd::lex_cast(const string_view &s) -{ - return s; -} - -template<> -inline std::string_view -ircd::lex_cast(const std::string_view &s) -{ - return s; -} - -template<> -__attribute__((warning("unnecessary lexical cast"))) -inline std::string -ircd::lex_cast(const std::string &s) -{ - return s; -} - -template -T -ircd::lex_cast(const std::string &s) -{ - return lex_cast(string_view{s}); -} - -template<> -inline std::string & -ircd::lex_cast(std::string &s) -{ - return s; -} - -template -T -ircd::lex_cast(std::string &s) -{ - return lex_cast(string_view{s}); -} - -template<> -inline ircd::string_view -ircd::lex_cast(const string_view &s, - char *const &buf, - const size_t &max) -{ - s.copy(buf, max); - return { buf, max }; -} - -template<> -inline ircd::string_view -ircd::lex_cast(const std::string_view &s, - char *const &buf, - const size_t &max) -{ - s.copy(buf, max); - return { buf, max }; -} - -template<> -inline ircd::string_view -ircd::lex_cast(const std::string &s, - char *const &buf, - const size_t &max) -{ - s.copy(buf, max); - return { buf, max }; -} - -template -__attribute__((error("unsupported lexical cast"))) -ircd::string_view -ircd::lex_cast(T t, - char *const &buf, - const size_t &max) -{ - assert(0); - return {}; -} - -template<> -inline bool -ircd::try_lex_cast(const string_view &) -{ - return true; -} - -template<> -inline bool -ircd::try_lex_cast(const string_view &) -{ - return true; -} - -template<> -inline bool -ircd::try_lex_cast(const string_view &s) -{ - return true; -} - -template -__attribute__((error("unsupported lexical cast"))) -bool -ircd::try_lex_cast(const string_view &s) -{ - assert(0); - return false; -} diff --git a/include/ircd/tokens.h b/include/ircd/tokens.h new file mode 100644 index 000000000..f70d7e8d7 --- /dev/null +++ b/include/ircd/tokens.h @@ -0,0 +1,205 @@ +/* + * charybdis: an advanced ircd. + * inline/stringops.h: inlined string operations used in a few places + * + * Copyright (C) 2005-2016 Charybdis Development Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + */ + +#pragma once +#define HAVE_IRCD_TOKENS_H + +// +// String tokenization utils +// +namespace ircd +{ + // Use the closure for best performance. Note that string_view's are not + // required to be null terminated. Construct an std::string from the view + // to allocate and copy the token with null termination. + using token_view = std::function; + void tokens(const string_view &str, const char &sep, const token_view &); + void tokens(const string_view &str, const char *const &sep, const token_view &); + size_t tokens(const string_view &str, const char &sep, const size_t &limit, const token_view &); + size_t tokens(const string_view &str, const char *const &sep, const size_t &limit, const token_view &); + + // Copies tokens into your buffer and null terminates strtok() style. Returns BYTES of buf consumed. + size_t tokens(const string_view &str, const char &sep, char *const &buf, const size_t &max, const token_view &); + size_t tokens(const string_view &str, const char *const &sep, char *const &buf, const size_t &max, const token_view &); + + // Receive token view into iterator range + template it tokens(const string_view &str, const sep &, const it &b, const it &e); + + // Receive token view into array + template size_t tokens(const string_view &str, const sep &, string_view (&buf)[N]); + template size_t tokens(const string_view &str, const sep &, std::array &); + + // Receive token view into new container (custom allocator) + template + class C, //= std::vector, + class T = string_view, + class A, + class sep> + C tokens(A&& allocator, const string_view &str, const sep &); + + // Receive token view into new container + template + class C, //= std::vector, + class T = string_view, + class A = std::allocator, + class sep> + C tokens(const string_view &str, const sep &); + + // Receive token view into new associative container (custom allocator) + template + class C, + class T = string_view, + class Comp = std::less, + class A, + class sep> + C tokens(A&& allocator, const string_view &str, const sep &); + + // Receive token view into new associative container + template + class C, + class T = string_view, + class Comp = std::less, + class A = std::allocator, + class sep> + C tokens(const string_view &str, const sep &); + + // Convenience to get individual tokens + size_t tokens_count(const string_view &str, const char &sep); + size_t tokens_count(const string_view &str, const char *const &sep); + string_view token(const string_view &str, const char &sep, const size_t &at); + string_view token(const string_view &str, const char *const &sep, const size_t &at); + string_view token_last(const string_view &str, const char &sep); + string_view token_last(const string_view &str, const char *const &sep); + string_view token_first(const string_view &str, const char &sep); + string_view token_first(const string_view &str, const char *const &sep); + string_view tokens_after(const string_view &str, const char &sep, const size_t &at); + string_view tokens_after(const string_view &str, const char *const &sep, const size_t &at); +} + +template +size_t +ircd::tokens(const string_view &str, + const delim &sep, + string_view (&buf)[N]) +{ + const auto e(tokens(str, sep, begin(buf), end(buf))); + return std::distance(begin(buf), e); +} + +template +size_t +ircd::tokens(const string_view &str, + const delim &sep, + std::array &buf) +{ + const auto e(tokens(str, sep, begin(buf), end(buf))); + return std::distance(begin(buf), e); +} + +template +it +ircd::tokens(const string_view &str, + const delim &sep, + const it &b, + const it &e) +{ + it pos(b); + tokens(str, sep, std::distance(b, e), [&pos] + (const string_view &token) + { + *pos = token; + ++pos; + }); + + return pos; +} + +template + class C, + class T, + class Comp, + class A, + class delim> +C +ircd::tokens(const string_view &str, + const delim &sep) +{ + A allocator; + return tokens(allocator, str, sep); +} + +template + class C, + class T, + class Comp, + class A, + class delim> +C +ircd::tokens(A&& allocator, + const string_view &str, + const delim &sep) +{ + C ret(std::forward(allocator)); + tokens(str, sep, [&ret] + (const string_view &token) + { + ret.emplace(ret.end(), token); + }); + + return ret; +} + +template + class C, + class T, + class A, + class delim> +C +ircd::tokens(const string_view &str, + const delim &sep) +{ + A allocator; + return tokens(allocator, str, sep); +} + +template + class C, + class T, + class A, + class delim> +C +ircd::tokens(A&& allocator, + const string_view &str, + const delim &sep) +{ + C ret(std::forward(allocator)); + tokens(str, sep, [&ret] + (const string_view &token) + { + ret.emplace(ret.end(), token); + }); + + return ret; +} diff --git a/ircd/lexical.cc b/ircd/lexical.cc index 37224f5a5..49d968504 100644 --- a/ircd/lexical.cc +++ b/ircd/lexical.cc @@ -35,6 +35,11 @@ #include #include +/////////////////////////////////////////////////////////////////////////////// +// +// ircd/tokens.h +// + ircd::string_view ircd::tokens_after(const string_view &str, const char &sep, @@ -243,73 +248,46 @@ ircd::tokens(const string_view &str, std::for_each(begin(view), end(view), closure); } -ircd::string_view -ircd::b64encode(const mutable_buffer &out, - const const_raw_buffer &in) +/////////////////////////////////////////////////////////////////////////////// +// +// ircd/lex_cast.h +// + +namespace ircd { + /// The static lex_cast ring buffers are each LEX_CAST_BUFSIZE bytes; + /// Consider increasing if some lex_cast(str) has more characters. + const size_t LEX_CAST_BUFSIZE {64}; - using transform = boost::archive::iterators::transform_width; - using b64fb = boost::archive::iterators::base64_from_binary; - using ostream_iterator = boost::archive::iterators::ostream_iterator; + /// This is a static "ring buffer" to simplify a majority of lex_cast uses. + /// If the lex_cast has binary input and string output, and no user buffer + /// is supplied, the next buffer here will be used instead. The returned + /// string_view of data from this buffer is only valid for several more + /// calls to lex_cast before it is overwritten. + thread_local char lex_cast_buf[LEX_CAST_BUFS][LEX_CAST_BUFSIZE]; + thread_local uint lex_cast_cur; - std::stringstream ss; - std::copy(b64fb(data(in)), b64fb(data(in) + size(in)), ostream_iterator(ss)); - const auto outlen(ss.str().copy(data(out), size(out))); - return { data(out), outlen }; + template static string_view _lex_cast(const T &i, char *buf, size_t max); + template static T _lex_cast(const string_view &s); } -ircd::const_raw_buffer -ircd::a2u(const mutable_raw_buffer &out, - const const_buffer &in) -{ - const size_t len{size(in) / 2}; - for(size_t i(0); i < len; ++i) - { - const char gl[3] - { - in[i * 2], - in[i * 2 + 1], - '\0' - }; - - out[i] = strtol(gl, nullptr, 16); - } - - return { data(out), len }; -} - -ircd::string_view -ircd::u2a(const mutable_buffer &out, - const const_raw_buffer &in) -{ - char *p(data(out)); - for(size_t i(0); i < size(in); ++i) - p += snprintf(p, size(out) - (p - data(out)), "%02x", in[i]); - - return { data(out), size_t(p - data(out)) }; -} - -namespace ircd { - -const size_t LEX_CAST_BUFSIZE {64}; -thread_local char lex_cast_buf[LEX_CAST_BUFS][LEX_CAST_BUFSIZE]; - +/// Internal template providing conversions from a number to a string; +/// potentially using the ring buffer if no user buffer is supplied. template -static string_view -_lex_cast(const T &i, - char *buf, - size_t max) +ircd::string_view +ircd::_lex_cast(const T &i, + char *buf, + size_t max) try { using array = std::array; if(!buf) { - static thread_local uint cur; - buf = lex_cast_buf[cur++]; + buf = lex_cast_buf[lex_cast_cur++]; max = LEX_CAST_BUFSIZE; - cur %= LEX_CAST_BUFS; + lex_cast_cur %= LEX_CAST_BUFS; } assert(max >= N); @@ -322,9 +300,11 @@ catch(const boost::bad_lexical_cast &e) throw ircd::bad_lex_cast("%s", e.what()); } +/// Internal template providing conversions from a string to a number; +/// the native object is returned directly; no ring buffer is consumed. template -static T -_lex_cast(const string_view &s) +T +ircd::_lex_cast(const string_view &s) try { return boost::lexical_cast(s); @@ -334,8 +314,6 @@ catch(const boost::bad_lexical_cast &e) throw ircd::bad_lex_cast("%s", e.what()); } -} // namespace ircd - template<> ircd::string_view ircd::lex_cast(bool i, char *const &buf, @@ -578,6 +556,57 @@ ircd::try_lex_cast(const string_view &s) return boost::conversion::try_lexical_convert(s, i); } +/////////////////////////////////////////////////////////////////////////////// +// +// ircd/stringops.h +// + +ircd::string_view +ircd::b64encode(const mutable_buffer &out, + const const_raw_buffer &in) +{ + + using transform = boost::archive::iterators::transform_width; + using b64fb = boost::archive::iterators::base64_from_binary; + using ostream_iterator = boost::archive::iterators::ostream_iterator; + + std::stringstream ss; + std::copy(b64fb(data(in)), b64fb(data(in) + size(in)), ostream_iterator(ss)); + const auto outlen(ss.str().copy(data(out), size(out))); + return { data(out), outlen }; +} + +ircd::const_raw_buffer +ircd::a2u(const mutable_raw_buffer &out, + const const_buffer &in) +{ + const size_t len{size(in) / 2}; + for(size_t i(0); i < len; ++i) + { + const char gl[3] + { + in[i * 2], + in[i * 2 + 1], + '\0' + }; + + out[i] = strtol(gl, nullptr, 16); + } + + return { data(out), len }; +} + +ircd::string_view +ircd::u2a(const mutable_buffer &out, + const const_raw_buffer &in) +{ + char *p(data(out)); + for(size_t i(0); i < size(in); ++i) + p += snprintf(p, size(out) - (p - data(out)), "%02x", in[i]); + + return { data(out), size_t(p - data(out)) }; +} + /* * strip_colour - remove colour codes from a string * -asuffield (?)