From 8aa67ccb4897e830fd45a2be0697b6459f1983d1 Mon Sep 17 00:00:00 2001 From: Jason Volk Date: Sat, 25 May 2019 16:46:51 -0700 Subject: [PATCH] ircd: Add stringops suite for globular expression matching. --- include/ircd/stringops.h | 58 +++++++++++++++++++++++++++++++++++++++ ircd/lexical.cc | 59 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 117 insertions(+) diff --git a/include/ircd/stringops.h b/include/ircd/stringops.h index c55885532..c3e2bae75 100644 --- a/include/ircd/stringops.h +++ b/include/ircd/stringops.h @@ -21,6 +21,10 @@ namespace ircd struct igreater; struct iequals; + // Globular ('*' and '?') expression utils. + struct gmatch; + struct gequals; + // Vintage struct strlcpy; struct strlcat; @@ -296,6 +300,60 @@ const }); } +/// Globular equals. This allows either side of the comparison to include '*' +/// and '?' characters and equality of the string expressions will be +/// determined. +struct ircd::gequals +{ + using is_transparent = std::true_type; + + bool s; + + operator const bool &() const + { + return s; + } + + bool operator()(const string_view &a, const string_view &b) const; + + template + gequals(A&& a, B&& b) + :s{operator()(std::forward(a), std::forward(b))} + {} + + gequals() = default; +}; + +/// Globular match. Similar to gequals but only one side of the comparison is +/// considered to be the expression with '*' and '?' characters. The expression +/// string is passed at construction. The comparison inputs are treated as +/// non-expression strings. This allows for greater optimization than gequals. +struct ircd::gmatch +{ + string_view expr; + bool s; + + operator const bool &() const + { + return s; + } + + bool operator()(const string_view &a) const; + + gmatch(const string_view &expr) + :expr{expr} + {} + + template + gmatch(const string_view &expr, A&& a) + :expr{expr} + ,s{operator()(std::forward(a))} + {} + + gmatch() = default; +}; + inline ircd::string_view ircd::trunc(const string_view &s, const size_t &max) diff --git a/ircd/lexical.cc b/ircd/lexical.cc index ddd4ae9fa..fec7efbcf 100644 --- a/ircd/lexical.cc +++ b/ircd/lexical.cc @@ -452,3 +452,62 @@ ircd::replace(const string_view &s, return std::distance(begin(buf), p); }); } + +// +// gequals +// + +bool +ircd::gequals::operator()(const string_view &a, const string_view &b) +const +{ + size_t ap(0), bp(0); + while(ap < a.size() && bp < b.size()) + { + const auto ca(tolower(a.at(ap))), cb(tolower(b.at(bp))); + const auto globa(ca == '*'), globb(cb == '*'); + const auto wilda(ca == '?'), wildb(cb == '?'); + + if(!globa && !globb && !wilda && !wildb && ca != cb) + return false; + + if((globa && ap + 1 >= a.size()) || (globb && bp + 1 >= b.size())) + break; + + if(globa && cb == tolower(a.at(ap + 1))) + ap += 2; + + if(globb && ca == tolower(b.at(bp + 1))) + bp += 2; + + if(globa && globb) + ++ap, ++bp; + + if(!globa) + ++ap; + + if(!globb) + ++bp; + } + + if(ap < a.size() && !b.empty() && b.back() == '*') + return true; + + if(bp < b.size() && !a.empty() && a.back() == '*') + return true; + + return std::equal(a.begin() + ap, a.end(), b.begin() + bp, b.end()); +} + +// +// gmatch +// + +bool +ircd::gmatch::operator()(const string_view &a) +const +{ + //TODO: optimize. + const gequals gequals(expr, a); + return bool(gequals); +}