Introduce new TIL string helpers, teach older ones to use wmemcmp (#11725)

til::equals:
At the time of writing wmemcmp() is not an intrinsic for MSVC,
but the STL uses it to implement wide string comparisons.
This produces 3x the assembly _per_ comparison and increases
runtime by 2-3x for strings of medium length (16 characters)
and 5x or more for long strings (128 characters or more).
See: https://github.com/microsoft/STL/issues/2289

Additionally a number of case insensitive, locale unaware
helpers for prefix/suffix comparisons are introduced.
This commit is contained in:
Leonard Hecker 2021-11-18 00:42:40 +01:00 committed by GitHub
parent 6742965bb8
commit a68b0d4f08
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 52 additions and 33 deletions

View File

@ -37,23 +37,3 @@ constexpr bool operator!=(const SMALL_RECT& a, const SMALL_RECT& b) noexcept
{
return !(a == b);
}
constexpr bool operator==(const std::wstring& wstr, const std::wstring_view& wstrView)
{
return (wstrView == std::wstring_view{ wstr.c_str(), wstr.size() });
}
constexpr bool operator==(const std::wstring_view& wstrView, const std::wstring& wstr)
{
return (wstr == wstrView);
}
constexpr bool operator!=(const std::wstring& wstr, const std::wstring_view& wstrView)
{
return !(wstr == wstrView);
}
constexpr bool operator!=(const std::wstring_view& wstrView, const std::wstring& wstr)
{
return !(wstr == wstrView);
}

View File

@ -1,4 +1,4 @@
// Copyright (c) Microsoft Corporation.
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.
#pragma once
@ -34,10 +34,7 @@ namespace til // Terminal Implementation Library. Also: "Today I Learned"
template<typename T, typename Traits>
constexpr bool starts_with(const std::basic_string_view<T, Traits>& str, const std::basic_string_view<T, Traits>& prefix) noexcept
{
#ifdef __cpp_lib_starts_ends_with
#error This code can be replaced in C++20, which natively supports .starts_with().
#endif
return str.size() >= prefix.size() && Traits::compare(str.data(), prefix.data(), prefix.size()) == 0;
return str.size() >= prefix.size() && __builtin_memcmp(str.data(), prefix.data(), prefix.size() * sizeof(T)) == 0;
}
constexpr bool starts_with(const std::string_view& str, const std::string_view& prefix) noexcept
@ -52,15 +49,10 @@ namespace til // Terminal Implementation Library. Also: "Today I Learned"
// std::string_view::ends_with support for C++17.
template<typename T, typename Traits>
constexpr bool ends_with(const std::basic_string_view<T, Traits>& str, const std::basic_string_view<T, Traits>& prefix) noexcept
constexpr bool ends_with(const std::basic_string_view<T, Traits>& str, const std::basic_string_view<T, Traits>& suffix) noexcept
{
#ifdef __cpp_lib_ends_ends_with
#error This code can be replaced in C++20, which natively supports .ends_with().
#endif
#pragma warning(push)
#pragma warning(disable : 26481) // Don't use pointer arithmetic. Use span instead (bounds.1).
return str.size() >= prefix.size() && Traits::compare(str.data() + (str.size() - prefix.size()), prefix.data(), prefix.size()) == 0;
#pragma warning(pop)
#pragma warning(suppress : 26481) // Don't use pointer arithmetic. Use span instead (bounds.1).
return str.size() >= suffix.size() && __builtin_memcmp(str.data() + (str.size() - suffix.size()), suffix.data(), suffix.size() * sizeof(T)) == 0;
}
constexpr bool ends_with(const std::string_view& str, const std::string_view& prefix) noexcept
@ -168,6 +160,20 @@ namespace til // Terminal Implementation Library. Also: "Today I Learned"
return c;
}
// Just like std::wstring_view::operator==().
//
// At the time of writing wmemcmp() is not an intrinsic for MSVC,
// but the STL uses it to implement wide string comparisons.
// This produces 3x the assembly _per_ comparison and increases
// runtime by 2-3x for strings of medium length (16 characters)
// and 5x or more for long strings (128 characters or more).
// See: https://github.com/microsoft/STL/issues/2289
template<typename T, typename Traits>
bool equals(const std::basic_string_view<T, Traits>& str1, const std::basic_string_view<T, Traits>& str2) noexcept
{
return lhs.size() == rhs.size() && __builtin_memcmp(lhs.data(), rhs.data(), lhs.size() * sizeof(T)) == 0;
}
// Just like _memicmp, but without annoying locales.
template<typename T, typename Traits>
bool equals_insensitive_ascii(const std::basic_string_view<T, Traits>& str1, const std::basic_string_view<T, Traits>& str2) noexcept
@ -205,6 +211,39 @@ namespace til // Terminal Implementation Library. Also: "Today I Learned"
return equals_insensitive_ascii<>(str1, str2);
}
template<typename T, typename Traits>
constexpr bool starts_with_insensitive_ascii(const std::basic_string_view<T, Traits>& str, const std::basic_string_view<T, Traits>& prefix) noexcept
{
return str.size() >= prefix.size() && equals_insensitive_ascii<>({ str.data(), prefix.size() }, prefix);
}
constexpr bool starts_with_insensitive_ascii(const std::string_view& str, const std::string_view& prefix) noexcept
{
return starts_with_insensitive_ascii<>(str, prefix);
}
constexpr bool starts_with_insensitive_ascii(const std::wstring_view& str, const std::wstring_view& prefix) noexcept
{
return starts_with_insensitive_ascii<>(str, prefix);
}
template<typename T, typename Traits>
constexpr bool ends_with_insensitive_ascii(const std::basic_string_view<T, Traits>& str, const std::basic_string_view<T, Traits>& suffix) noexcept
{
#pragma warning(suppress : 26481) // Don't use pointer arithmetic. Use span instead (bounds.1).
return str.size() >= suffix.size() && equals_insensitive_ascii<>({ str.data() - suffix.size(), suffix.size() }, suffix);
}
constexpr bool ends_with_insensitive_ascii(const std::string_view& str, const std::string_view& prefix) noexcept
{
return ends_with_insensitive_ascii<>(str, prefix);
}
constexpr bool ends_with_insensitive_ascii(const std::wstring_view& str, const std::wstring_view& prefix) noexcept
{
return ends_with<>(str, prefix);
}
// Give the arguments ("foo bar baz", " "), this method will
// * modify the first argument to "bar baz"
// * return "foo"