mirror of
https://github.com/matrix-construct/construct
synced 2025-01-14 16:46:50 +01:00
ircd::utf8: Add multiple lane overloads for length(); internal template; minor comments.
This commit is contained in:
parent
35bee76625
commit
c9c61124e7
2 changed files with 38 additions and 6 deletions
|
@ -20,7 +20,11 @@ namespace ircd::utf
|
||||||
/// Unicode Transformation Format (8-bit)
|
/// Unicode Transformation Format (8-bit)
|
||||||
namespace ircd::utf8
|
namespace ircd::utf8
|
||||||
{
|
{
|
||||||
|
// Get the utf8-encoded length from decoded codepoints.
|
||||||
u32x16 length(const u32x16 codepoints) noexcept;
|
u32x16 length(const u32x16 codepoints) noexcept;
|
||||||
|
u32x8 length(const u32x8 codepoints) noexcept;
|
||||||
|
u32x4 length(const u32x4 codepoints) noexcept;
|
||||||
|
|
||||||
u32x16 encode(const u32x16 codepoints) noexcept;
|
u32x16 encode(const u32x16 codepoints) noexcept;
|
||||||
u32x16 decode(const u8x16 string) noexcept;
|
u32x16 decode(const u8x16 string) noexcept;
|
||||||
}
|
}
|
||||||
|
@ -38,7 +42,6 @@ namespace ircd::utf16
|
||||||
u8x16 find_surrogate(const u8x16 input) noexcept;
|
u8x16 find_surrogate(const u8x16 input) noexcept;
|
||||||
|
|
||||||
// scan for utf-16 surrogates including incomplete sequences truncated
|
// scan for utf-16 surrogates including incomplete sequences truncated
|
||||||
// by the end of the input; also matches a single trailing slash.
|
|
||||||
u8x16 find_surrogate_partial(const u8x16 input) noexcept;
|
u8x16 find_surrogate_partial(const u8x16 input) noexcept;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
39
ircd/utf.cc
39
ircd/utf.cc
|
@ -29,6 +29,8 @@ ircd::utf16::truncation_table
|
||||||
~shl<0x08>(~full_mask),
|
~shl<0x08>(~full_mask),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/// scan for utf-16 surrogates including incomplete sequences truncated
|
||||||
|
/// by the end of the input; also matches a single trailing slash.
|
||||||
ircd::u8x16
|
ircd::u8x16
|
||||||
ircd::utf16::find_surrogate_partial(const u8x16 input)
|
ircd::utf16::find_surrogate_partial(const u8x16 input)
|
||||||
noexcept
|
noexcept
|
||||||
|
@ -264,21 +266,48 @@ noexcept
|
||||||
;
|
;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Determine the utf-8 encoding length of multiple codepoints in parallel.
|
namespace ircd::utf8
|
||||||
/// The input vector char32_t codepoints and the output yields an integer
|
{
|
||||||
/// of 0-4 for each lane.
|
template<class u32xN> static u32xN _length(const u32xN codepoint) noexcept;
|
||||||
|
}
|
||||||
|
|
||||||
|
ircd::u32x4
|
||||||
|
ircd::utf8::length(const u32x4 codepoint)
|
||||||
|
noexcept
|
||||||
|
{
|
||||||
|
return _length(codepoint);
|
||||||
|
}
|
||||||
|
|
||||||
|
ircd::u32x8
|
||||||
|
ircd::utf8::length(const u32x8 codepoint)
|
||||||
|
noexcept
|
||||||
|
{
|
||||||
|
return _length(codepoint);
|
||||||
|
}
|
||||||
|
|
||||||
ircd::u32x16
|
ircd::u32x16
|
||||||
ircd::utf8::length(const u32x16 codepoint)
|
ircd::utf8::length(const u32x16 codepoint)
|
||||||
noexcept
|
noexcept
|
||||||
{
|
{
|
||||||
const u32x16
|
return _length(codepoint);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Determine the utf-8 encoding length of multiple codepoints in parallel.
|
||||||
|
/// The input vector char32_t codepoints and the output yields an integer
|
||||||
|
/// of 0-4 for each lane.
|
||||||
|
template<class u32xN>
|
||||||
|
u32xN
|
||||||
|
ircd::utf8::_length(const u32xN codepoint)
|
||||||
|
noexcept
|
||||||
|
{
|
||||||
|
const u32xN
|
||||||
length_1 { codepoint <= 0x7f },
|
length_1 { codepoint <= 0x7f },
|
||||||
length_2 { codepoint <= 0x7ff && codepoint > 0x7f },
|
length_2 { codepoint <= 0x7ff && codepoint > 0x7f },
|
||||||
length_3_lo { codepoint <= 0xd7ff && codepoint > 0x7ff },
|
length_3_lo { codepoint <= 0xd7ff && codepoint > 0x7ff },
|
||||||
length_3_hi { codepoint <= 0xffff && codepoint > 0xdfff },
|
length_3_hi { codepoint <= 0xffff && codepoint > 0xdfff },
|
||||||
length_4 { codepoint <= 0x10ffff && codepoint > 0xffff };
|
length_4 { codepoint <= 0x10ffff && codepoint > 0xffff };
|
||||||
|
|
||||||
[[gnu::unused]] const u32x16 // Preserved here for future reference
|
[[gnu::unused]] const u32xN // Preserved here for future reference
|
||||||
length_3_err { codepoint <= 0xdfff && codepoint > 0xd7ff },
|
length_3_err { codepoint <= 0xdfff && codepoint > 0xd7ff },
|
||||||
length_err { (codepoint > 0x10ffff) | length_3_err };
|
length_err { (codepoint > 0x10ffff) | length_3_err };
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue