0
0
Fork 0
mirror of https://github.com/matrix-construct/construct synced 2024-06-02 18:18:56 +02:00

ircd::utf: Simplify interfaces with weak specializations for vector widths; template inline.

This commit is contained in:
Jason Volk 2020-09-03 17:44:39 -07:00
parent 43afc1a9a7
commit 4a5d6066fe
2 changed files with 32 additions and 20 deletions

View file

@ -21,14 +21,10 @@ namespace ircd::utf
namespace ircd::utf8
{
// Get the utf8-encoded length from char32_t (decoded) codepoints
u32x16 length(const u32x16 codepoints) noexcept;
u32x8 length(const u32x8 codepoints) noexcept;
u32x4 length(const u32x4 codepoints) noexcept;
template<class u32xN> u32xN length(const u32xN codepoints) noexcept;
// Encode char32_t codepoints into respective utf-8 encodings
u32x16 encode(const u32x16 codepoints) noexcept;
u32x8 encode(const u32x8 codepoints) noexcept;
u32x4 encode(const u32x4 codepoints) noexcept;
template<class u32xN> u32xN encode(const u32xN codepoints) noexcept;
// Decode utf-8 string into char32_t unicode codepoints
u32x16 decode(const u8x16 string) noexcept;
@ -38,10 +34,10 @@ namespace ircd::utf8
namespace ircd::utf16
{
// mask all surrogate characters from find_() result
u8x16 mask_surrogate(const u8x16 found) noexcept;
template<class u8xN> u8xN mask_surrogate(const u8xN found) noexcept;
// scan for utf-16 surrogates
u8x16 find_surrogate(const u8x16 input) noexcept;
template<class u8xN> u8xN find_surrogate(const u8xN input) noexcept;
// scan for utf-16 surrogates including incomplete sequences truncated
u8x16 find_surrogate_partial(const u8x16 input) noexcept;
@ -50,11 +46,15 @@ namespace ircd::utf16
u32x4 decode_surrogate_aligned_next(const u8x16 input) noexcept;
}
inline ircd::u8x16
ircd::utf16::mask_surrogate(const u8x16 found)
/// The vector returned by find_surrogate() masks the leading character of
/// every valid surrogate (i.e. the '\'). This is a convenience to mask
/// the full surrogate from such a result.
template<class u8xN>
inline u8xN
ircd::utf16::mask_surrogate(const u8xN found)
noexcept
{
return u8x16
return u8xN
{
shl<0x08>(found) |
shl<0x10>(found) |

View file

@ -252,23 +252,31 @@ noexcept
return ret;
}
ircd::u8x16
ircd::utf16::find_surrogate(const u8x16 input)
namespace ircd::utf16
{
template u8x16 utf16::find_surrogate<u8x16>(const u8x16) noexcept;
template u8x32 utf16::find_surrogate<u8x32>(const u8x32) noexcept;
template u8x64 utf16::find_surrogate<u8x64>(const u8x64) noexcept;
}
template<class u8xN>
u8xN
ircd::utf16::find_surrogate(const u8xN input)
noexcept
{
const u8x16 hex_nibble[3]
const u8xN hex_nibble[3]
{
input >= '0' && input <= '9',
input >= 'A' && input <= 'F',
input >= 'a' && input <= 'f',
};
const u8x16 is_hex_nibble
const u8xN is_hex_nibble
{
hex_nibble[0] | hex_nibble[1] | hex_nibble[2]
};
const auto is_surrogate
const u8xN is_surrogate
{
(input == '\\') &
shr<8>(input == 'u') &
@ -366,10 +374,10 @@ noexcept
namespace ircd::utf8
{
template<class u32xN>
static u32xN _encode(const u32xN codepoint) noexcept;
template<class u32xN> static u32xN _encode(const u32xN codepoint) noexcept;
}
template<>
ircd::u32x4
ircd::utf8::encode(const u32x4 codepoint)
noexcept
@ -377,6 +385,7 @@ noexcept
return _encode(codepoint);
}
template<>
ircd::u32x8
ircd::utf8::encode(const u32x8 codepoint)
noexcept
@ -403,6 +412,7 @@ noexcept
}
#endif
template<>
ircd::u32x16
ircd::utf8::encode(const u32x16 codepoint)
noexcept
@ -474,10 +484,10 @@ noexcept
namespace ircd::utf8
{
template<class u32xN>
static u32xN _length(const u32xN codepoint) noexcept;
template<class u32xN> static u32xN _length(const u32xN codepoint) noexcept;
}
template<>
ircd::u32x4
ircd::utf8::length(const u32x4 codepoint)
noexcept
@ -485,6 +495,7 @@ noexcept
return _length(codepoint);
}
template<>
ircd::u32x8
ircd::utf8::length(const u32x8 codepoint)
noexcept
@ -511,6 +522,7 @@ noexcept
}
#endif
template<>
ircd::u32x16
ircd::utf8::length(const u32x16 codepoint)
noexcept