Optimize wide string comparisons with memcmp

This commit is contained in:
Leonard Hecker 2021-11-10 17:28:42 +01:00
parent 7db7ba1ac9
commit 0a1ee7eabf
3 changed files with 134 additions and 33 deletions

View file

@ -37,23 +37,3 @@ constexpr bool operator!=(const SMALL_RECT& a, const SMALL_RECT& b) noexcept
{
return !(a == b);
}
constexpr bool operator==(const std::wstring& wstr, const std::wstring_view& wstrView)
{
return (wstrView == std::wstring_view{ wstr.c_str(), wstr.size() });
}
constexpr bool operator==(const std::wstring_view& wstrView, const std::wstring& wstr)
{
return (wstr == wstrView);
}
constexpr bool operator!=(const std::wstring& wstr, const std::wstring_view& wstrView)
{
return !(wstr == wstrView);
}
constexpr bool operator!=(const std::wstring_view& wstrView, const std::wstring& wstr)
{
return !(wstr == wstrView);
}

View file

@ -1,8 +1,10 @@
// Copyright (c) Microsoft Corporation.
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.
#pragma once
#include "string_ext.h"
namespace til // Terminal Implementation Library. Also: "Today I Learned"
{
_TIL_INLINEPREFIX std::wstring visualize_control_codes(std::wstring str) noexcept
@ -34,10 +36,7 @@ namespace til // Terminal Implementation Library. Also: "Today I Learned"
template<typename T, typename Traits>
constexpr bool starts_with(const std::basic_string_view<T, Traits>& str, const std::basic_string_view<T, Traits>& prefix) noexcept
{
#ifdef __cpp_lib_starts_ends_with
#error This code can be replaced in C++20, which natively supports .starts_with().
#endif
return str.size() >= prefix.size() && Traits::compare(str.data(), prefix.data(), prefix.size()) == 0;
return str.size() >= prefix.size() && __builtin_memcmp(str.data(), prefix.data(), prefix.size() * sizeof(T)) == 0;
}
constexpr bool starts_with(const std::string_view& str, const std::string_view& prefix) noexcept
@ -52,15 +51,10 @@ namespace til // Terminal Implementation Library. Also: "Today I Learned"
// std::string_view::ends_with support for C++17.
template<typename T, typename Traits>
constexpr bool ends_with(const std::basic_string_view<T, Traits>& str, const std::basic_string_view<T, Traits>& prefix) noexcept
constexpr bool ends_with(const std::basic_string_view<T, Traits>& str, const std::basic_string_view<T, Traits>& suffix) noexcept
{
#ifdef __cpp_lib_ends_ends_with
#error This code can be replaced in C++20, which natively supports .ends_with().
#endif
#pragma warning(push)
#pragma warning(disable : 26481) // Don't use pointer arithmetic. Use span instead (bounds.1).
return str.size() >= prefix.size() && Traits::compare(str.data() + (str.size() - prefix.size()), prefix.data(), prefix.size()) == 0;
#pragma warning(pop)
#pragma warning(suppress : 26481) // Don't use pointer arithmetic. Use span instead (bounds.1).
return str.size() >= suffix.size() && __builtin_memcmp(str.data() + (str.size() - suffix.size()), suffix.data(), suffix.size() * sizeof(T)) == 0;
}
constexpr bool ends_with(const std::string_view& str, const std::string_view& prefix) noexcept
@ -205,6 +199,39 @@ namespace til // Terminal Implementation Library. Also: "Today I Learned"
return equals_insensitive_ascii<>(str1, str2);
}
template<typename T, typename Traits>
constexpr bool starts_with_insensitive_ascii(const std::basic_string_view<T, Traits>& str, const std::basic_string_view<T, Traits>& prefix) noexcept
{
return str.size() >= prefix.size() && equals_insensitive_ascii<>({ str.data(), prefix.size() }, prefix);
}
constexpr bool starts_with_insensitive_ascii(const std::string_view& str, const std::string_view& prefix) noexcept
{
return starts_with_insensitive_ascii<>(str, prefix);
}
constexpr bool starts_with_insensitive_ascii(const std::wstring_view& str, const std::wstring_view& prefix) noexcept
{
return starts_with_insensitive_ascii<>(str, prefix);
}
template<typename T, typename Traits>
constexpr bool ends_with_insensitive_ascii(const std::basic_string_view<T, Traits>& str, const std::basic_string_view<T, Traits>& suffix) noexcept
{
#pragma warning(suppress : 26481) // Don't use pointer arithmetic. Use span instead (bounds.1).
return str.size() >= suffix.size() && equals_insensitive_ascii<>({ str.data() - suffix.size(), suffix.size() }, suffix);
}
constexpr bool ends_with_insensitive_ascii(const std::string_view& str, const std::string_view& prefix) noexcept
{
return ends_with_insensitive_ascii<>(str, prefix);
}
constexpr bool ends_with_insensitive_ascii(const std::wstring_view& str, const std::wstring_view& prefix) noexcept
{
return ends_with<>(str, prefix);
}
// Give the arguments ("foo bar baz", " "), this method will
// * modify the first argument to "bar baz"
// * return "foo"

94
src/inc/til/string_ext.h Normal file
View file

@ -0,0 +1,94 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.
#pragma once
// At the time of writing wmemcmp() is not an intrinsic for MSVC,
// but the STL uses it to implement wide string comparisons.
// This produces 3x the assembly _per_ comparison and increases
// runtime by 2-3x for strings of medium length (16 characters)
// and 5x or more for long strings (128 characters or more).
// See: https://github.com/microsoft/STL/issues/2289
//
// This reduces the binary size of conhost by about 0.25% (or 3kB).
// It has no performance advantage in the general case, but
// trivially prevents us from running into one in edge cases.
constexpr bool operator==(const std::wstring_view& lhs, const std::wstring_view& rhs) noexcept
{
return lhs.size() == rhs.size() && __builtin_memcmp(lhs.data(), rhs.data(), lhs.size() * sizeof(wchar_t)) == 0;
}
_CONSTEXPR20 bool operator==(const std::wstring_view& lhs, const std::wstring& rhs) noexcept
{
return lhs.size() == rhs.size() && __builtin_memcmp(lhs.data(), rhs.data(), lhs.size() * sizeof(wchar_t)) == 0;
}
_CONSTEXPR20 bool operator==(const std::wstring& rhs, const std::wstring_view& lhs) noexcept
{
return lhs.size() == rhs.size() && __builtin_memcmp(lhs.data(), rhs.data(), lhs.size() * sizeof(wchar_t)) == 0;
}
_CONSTEXPR20 bool operator==(const std::wstring& lhs, const std::wstring& rhs) noexcept
{
return lhs.size() == rhs.size() && __builtin_memcmp(lhs.data(), rhs.data(), lhs.size() * sizeof(wchar_t)) == 0;
}
constexpr bool operator==(const std::wstring_view& lhs, const wchar_t* rhs) noexcept
{
return lhs == std::wstring_view{ rhs };
}
constexpr bool operator==(const wchar_t* lhs, std::wstring_view& rhs) noexcept
{
return std::wstring_view{ lhs } == rhs;
}
_CONSTEXPR20 bool operator==(const std::wstring& lhs, const wchar_t* rhs) noexcept
{
return lhs == std::wstring_view{ rhs };
}
_CONSTEXPR20 bool operator==(const wchar_t* lhs, std::wstring& rhs) noexcept
{
return std::wstring_view{ lhs } == rhs;
}
constexpr bool operator!=(const std::wstring_view& lhs, const std::wstring_view& rhs) noexcept
{
return lhs.size() != rhs.size() || __builtin_memcmp(lhs.data(), rhs.data(), lhs.size() * sizeof(wchar_t)) != 0;
}
_CONSTEXPR20 bool operator!=(const std::wstring_view& lhs, const std::wstring& rhs) noexcept
{
return lhs.size() != rhs.size() || __builtin_memcmp(lhs.data(), rhs.data(), lhs.size() * sizeof(wchar_t)) != 0;
}
_CONSTEXPR20 bool operator!=(const std::wstring& rhs, const std::wstring_view& lhs) noexcept
{
return lhs.size() != rhs.size() || __builtin_memcmp(lhs.data(), rhs.data(), lhs.size() * sizeof(wchar_t)) != 0;
}
_CONSTEXPR20 bool operator!=(const std::wstring& lhs, const std::wstring& rhs) noexcept
{
return lhs.size() != rhs.size() || __builtin_memcmp(lhs.data(), rhs.data(), lhs.size() * sizeof(wchar_t)) != 0;
}
constexpr bool operator!=(const std::wstring_view& lhs, const wchar_t* rhs) noexcept
{
return lhs != std::wstring_view{ rhs };
}
constexpr bool operator!=(const wchar_t* lhs, std::wstring_view& rhs) noexcept
{
return std::wstring_view{ lhs } != rhs;
}
_CONSTEXPR20 bool operator!=(const std::wstring& lhs, const wchar_t* rhs) noexcept
{
return lhs != std::wstring_view{ rhs };
}
_CONSTEXPR20 bool operator!=(const wchar_t* lhs, std::wstring& rhs) noexcept
{
return std::wstring_view{ lhs } != rhs;
}