0
0
Fork 0
mirror of https://github.com/matrix-construct/construct synced 2024-12-26 15:33:54 +01:00

ircd::utf: Internal template for utf8::encode() w/ additional lane choices for interface.

This commit is contained in:
Jason Volk 2020-07-03 14:00:09 -07:00
parent d6d4c0b80a
commit 76a16469fb
2 changed files with 40 additions and 8 deletions

View file

@ -20,12 +20,17 @@ namespace ircd::utf
/// Unicode Transformation Format (8-bit)
namespace ircd::utf8
{
// Get the utf8-encoded length from decoded codepoints.
// Get the utf8-encoded length from char32_t (decoded) codepoints
u32x16 length(const u32x16 codepoints) noexcept;
u32x8 length(const u32x8 codepoints) noexcept;
u32x4 length(const u32x4 codepoints) noexcept;
// Encode char32_t codepoints into respective utf-8 encodings
u32x16 encode(const u32x16 codepoints) noexcept;
u32x8 encode(const u32x8 codepoints) noexcept;
u32x4 encode(const u32x4 codepoints) noexcept;
// Decode utf-8 string into char32_t unicode codepoints
u32x16 decode(const u8x16 string) noexcept;
}

View file

@ -296,32 +296,59 @@ noexcept
return integers;
}
/// Transform multiple char32_t codepoints to their utf-8 encodings in
/// parallel, returning a sparse result in each char32_t (this does not
/// compress the result down).
namespace ircd::utf8
{
template<class u32xN> static u32xN _encode(const u32xN codepoint) noexcept;
}
ircd::u32x4
ircd::utf8::encode(const u32x4 codepoint)
noexcept
{
return _encode(codepoint);
}
ircd::u32x8
ircd::utf8::encode(const u32x8 codepoint)
noexcept
{
return _encode(codepoint);
}
ircd::u32x16
ircd::utf8::encode(const u32x16 codepoint)
noexcept
{
const u32x16 len
return _encode(codepoint);
}
/// Transform multiple char32_t codepoints to their utf-8 encodings in
/// parallel, returning a sparse result in each char32_t (this does not
/// compress the result down).
template<class u32xN>
u32xN
ircd::utf8::_encode(const u32xN codepoint)
noexcept
{
const u32xN len
{
length(codepoint)
};
const u32x16 enc_2
const u32xN enc_2
{
(((codepoint >> 6) | 0xc0) & 0xff) // byte[0]
| ((((codepoint & 0x3f) | 0x80) &0xff) << 8) // byte[1]
};
const u32x16 enc_3
const u32xN enc_3
{
(((codepoint >> 12) | 0xe0) & 0xff) | // byte[0]
(((((codepoint >> 6) & 0x3f) | 0x80) & 0xff) << 8) | // byte[1]
((((codepoint & 0x3f) | 0x80) & 0xff) << 16) // byte[3]
};
const u32x16 enc_4
const u32xN enc_4
{
(((codepoint >> 18) | 0xf0) & 0xff) | // byte[0]
(((((codepoint >> 12) & 0x3f) | 0x80) & 0xff) << 8) | // byte[1]