mirror of
https://github.com/matrix-construct/construct
synced 2024-06-02 18:18:56 +02:00
ircd::utf: Simplify interfaces with weak specializations for vector widths; template inline.
This commit is contained in:
parent
43afc1a9a7
commit
4a5d6066fe
|
@ -21,14 +21,10 @@ namespace ircd::utf
|
|||
namespace ircd::utf8
|
||||
{
|
||||
// Get the utf8-encoded length from char32_t (decoded) codepoints
|
||||
u32x16 length(const u32x16 codepoints) noexcept;
|
||||
u32x8 length(const u32x8 codepoints) noexcept;
|
||||
u32x4 length(const u32x4 codepoints) noexcept;
|
||||
template<class u32xN> u32xN length(const u32xN codepoints) noexcept;
|
||||
|
||||
// Encode char32_t codepoints into respective utf-8 encodings
|
||||
u32x16 encode(const u32x16 codepoints) noexcept;
|
||||
u32x8 encode(const u32x8 codepoints) noexcept;
|
||||
u32x4 encode(const u32x4 codepoints) noexcept;
|
||||
template<class u32xN> u32xN encode(const u32xN codepoints) noexcept;
|
||||
|
||||
// Decode utf-8 string into char32_t unicode codepoints
|
||||
u32x16 decode(const u8x16 string) noexcept;
|
||||
|
@ -38,10 +34,10 @@ namespace ircd::utf8
|
|||
namespace ircd::utf16
|
||||
{
|
||||
// mask all surrogate characters from find_() result
|
||||
u8x16 mask_surrogate(const u8x16 found) noexcept;
|
||||
template<class u8xN> u8xN mask_surrogate(const u8xN found) noexcept;
|
||||
|
||||
// scan for utf-16 surrogates
|
||||
u8x16 find_surrogate(const u8x16 input) noexcept;
|
||||
template<class u8xN> u8xN find_surrogate(const u8xN input) noexcept;
|
||||
|
||||
// scan for utf-16 surrogates including incomplete sequences truncated
|
||||
u8x16 find_surrogate_partial(const u8x16 input) noexcept;
|
||||
|
@ -50,11 +46,15 @@ namespace ircd::utf16
|
|||
u32x4 decode_surrogate_aligned_next(const u8x16 input) noexcept;
|
||||
}
|
||||
|
||||
inline ircd::u8x16
|
||||
ircd::utf16::mask_surrogate(const u8x16 found)
|
||||
/// The vector returned by find_surrogate() masks the leading character of
|
||||
/// every valid surrogate (i.e. the '\'). This is a convenience to mask
|
||||
/// the full surrogate from such a result.
|
||||
template<class u8xN>
|
||||
inline u8xN
|
||||
ircd::utf16::mask_surrogate(const u8xN found)
|
||||
noexcept
|
||||
{
|
||||
return u8x16
|
||||
return u8xN
|
||||
{
|
||||
shl<0x08>(found) |
|
||||
shl<0x10>(found) |
|
||||
|
|
30
ircd/utf.cc
30
ircd/utf.cc
|
@ -252,23 +252,31 @@ noexcept
|
|||
return ret;
|
||||
}
|
||||
|
||||
ircd::u8x16
|
||||
ircd::utf16::find_surrogate(const u8x16 input)
|
||||
namespace ircd::utf16
|
||||
{
|
||||
template u8x16 utf16::find_surrogate<u8x16>(const u8x16) noexcept;
|
||||
template u8x32 utf16::find_surrogate<u8x32>(const u8x32) noexcept;
|
||||
template u8x64 utf16::find_surrogate<u8x64>(const u8x64) noexcept;
|
||||
}
|
||||
|
||||
template<class u8xN>
|
||||
u8xN
|
||||
ircd::utf16::find_surrogate(const u8xN input)
|
||||
noexcept
|
||||
{
|
||||
const u8x16 hex_nibble[3]
|
||||
const u8xN hex_nibble[3]
|
||||
{
|
||||
input >= '0' && input <= '9',
|
||||
input >= 'A' && input <= 'F',
|
||||
input >= 'a' && input <= 'f',
|
||||
};
|
||||
|
||||
const u8x16 is_hex_nibble
|
||||
const u8xN is_hex_nibble
|
||||
{
|
||||
hex_nibble[0] | hex_nibble[1] | hex_nibble[2]
|
||||
};
|
||||
|
||||
const auto is_surrogate
|
||||
const u8xN is_surrogate
|
||||
{
|
||||
(input == '\\') &
|
||||
shr<8>(input == 'u') &
|
||||
|
@ -366,10 +374,10 @@ noexcept
|
|||
|
||||
namespace ircd::utf8
|
||||
{
|
||||
template<class u32xN>
|
||||
static u32xN _encode(const u32xN codepoint) noexcept;
|
||||
template<class u32xN> static u32xN _encode(const u32xN codepoint) noexcept;
|
||||
}
|
||||
|
||||
template<>
|
||||
ircd::u32x4
|
||||
ircd::utf8::encode(const u32x4 codepoint)
|
||||
noexcept
|
||||
|
@ -377,6 +385,7 @@ noexcept
|
|||
return _encode(codepoint);
|
||||
}
|
||||
|
||||
template<>
|
||||
ircd::u32x8
|
||||
ircd::utf8::encode(const u32x8 codepoint)
|
||||
noexcept
|
||||
|
@ -403,6 +412,7 @@ noexcept
|
|||
}
|
||||
#endif
|
||||
|
||||
template<>
|
||||
ircd::u32x16
|
||||
ircd::utf8::encode(const u32x16 codepoint)
|
||||
noexcept
|
||||
|
@ -474,10 +484,10 @@ noexcept
|
|||
|
||||
namespace ircd::utf8
|
||||
{
|
||||
template<class u32xN>
|
||||
static u32xN _length(const u32xN codepoint) noexcept;
|
||||
template<class u32xN> static u32xN _length(const u32xN codepoint) noexcept;
|
||||
}
|
||||
|
||||
template<>
|
||||
ircd::u32x4
|
||||
ircd::utf8::length(const u32x4 codepoint)
|
||||
noexcept
|
||||
|
@ -485,6 +495,7 @@ noexcept
|
|||
return _length(codepoint);
|
||||
}
|
||||
|
||||
template<>
|
||||
ircd::u32x8
|
||||
ircd::utf8::length(const u32x8 codepoint)
|
||||
noexcept
|
||||
|
@ -511,6 +522,7 @@ noexcept
|
|||
}
|
||||
#endif
|
||||
|
||||
template<>
|
||||
ircd::u32x16
|
||||
ircd::utf8::length(const u32x16 codepoint)
|
||||
noexcept
|
||||
|
|
Loading…
Reference in a new issue