mirror of
https://github.com/matrix-construct/construct
synced 2024-12-26 15:33:54 +01:00
ircd::utf8: Add multiple lane overloads for length(); internal template; minor comments.
This commit is contained in:
parent
35bee76625
commit
c9c61124e7
2 changed files with 38 additions and 6 deletions
|
@ -20,7 +20,11 @@ namespace ircd::utf
|
|||
/// Unicode Transformation Format (8-bit)
|
||||
namespace ircd::utf8
|
||||
{
|
||||
// Get the utf8-encoded length from decoded codepoints.
|
||||
u32x16 length(const u32x16 codepoints) noexcept;
|
||||
u32x8 length(const u32x8 codepoints) noexcept;
|
||||
u32x4 length(const u32x4 codepoints) noexcept;
|
||||
|
||||
u32x16 encode(const u32x16 codepoints) noexcept;
|
||||
u32x16 decode(const u8x16 string) noexcept;
|
||||
}
|
||||
|
@ -38,7 +42,6 @@ namespace ircd::utf16
|
|||
u8x16 find_surrogate(const u8x16 input) noexcept;
|
||||
|
||||
// scan for utf-16 surrogates including incomplete sequences truncated
|
||||
// by the end of the input; also matches a single trailing slash.
|
||||
u8x16 find_surrogate_partial(const u8x16 input) noexcept;
|
||||
}
|
||||
|
||||
|
|
39
ircd/utf.cc
39
ircd/utf.cc
|
@ -29,6 +29,8 @@ ircd::utf16::truncation_table
|
|||
~shl<0x08>(~full_mask),
|
||||
};
|
||||
|
||||
/// scan for utf-16 surrogates including incomplete sequences truncated
|
||||
/// by the end of the input; also matches a single trailing slash.
|
||||
ircd::u8x16
|
||||
ircd::utf16::find_surrogate_partial(const u8x16 input)
|
||||
noexcept
|
||||
|
@ -264,21 +266,48 @@ noexcept
|
|||
;
|
||||
}
|
||||
|
||||
/// Determine the utf-8 encoding length of multiple codepoints in parallel.
|
||||
/// The input vector char32_t codepoints and the output yields an integer
|
||||
/// of 0-4 for each lane.
|
||||
namespace ircd::utf8
|
||||
{
|
||||
template<class u32xN> static u32xN _length(const u32xN codepoint) noexcept;
|
||||
}
|
||||
|
||||
ircd::u32x4
|
||||
ircd::utf8::length(const u32x4 codepoint)
|
||||
noexcept
|
||||
{
|
||||
return _length(codepoint);
|
||||
}
|
||||
|
||||
ircd::u32x8
|
||||
ircd::utf8::length(const u32x8 codepoint)
|
||||
noexcept
|
||||
{
|
||||
return _length(codepoint);
|
||||
}
|
||||
|
||||
ircd::u32x16
|
||||
ircd::utf8::length(const u32x16 codepoint)
|
||||
noexcept
|
||||
{
|
||||
const u32x16
|
||||
return _length(codepoint);
|
||||
}
|
||||
|
||||
/// Determine the utf-8 encoding length of multiple codepoints in parallel.
|
||||
/// The input vector char32_t codepoints and the output yields an integer
|
||||
/// of 0-4 for each lane.
|
||||
template<class u32xN>
|
||||
u32xN
|
||||
ircd::utf8::_length(const u32xN codepoint)
|
||||
noexcept
|
||||
{
|
||||
const u32xN
|
||||
length_1 { codepoint <= 0x7f },
|
||||
length_2 { codepoint <= 0x7ff && codepoint > 0x7f },
|
||||
length_3_lo { codepoint <= 0xd7ff && codepoint > 0x7ff },
|
||||
length_3_hi { codepoint <= 0xffff && codepoint > 0xdfff },
|
||||
length_4 { codepoint <= 0x10ffff && codepoint > 0xffff };
|
||||
|
||||
[[gnu::unused]] const u32x16 // Preserved here for future reference
|
||||
[[gnu::unused]] const u32xN // Preserved here for future reference
|
||||
length_3_err { codepoint <= 0xdfff && codepoint > 0xd7ff },
|
||||
length_err { (codepoint > 0x10ffff) | length_3_err };
|
||||
|
||||
|
|
Loading…
Reference in a new issue