ircd::utf: Simplify interfaces with weak specializations for vector widths; template inline.

2024-06-02 18:18:56 +02:00 · 2020-09-03 17:44:39 -07:00 · 2020-09-03 17:44:39 -07:00 · 4a5d6066fe
parent 43afc1a9a7
commit 4a5d6066fe
2 changed files with 32 additions and 20 deletions
--- a/include/ircd/utf.h
+++ b/include/ircd/utf.h
@ -21,14 +21,10 @@ namespace ircd::utf
 namespace ircd::utf8
 {
 	// Get the utf8-encoded length from char32_t (decoded) codepoints
-	u32x16 length(const u32x16 codepoints) noexcept;
-	u32x8 length(const u32x8 codepoints) noexcept;
-	u32x4 length(const u32x4 codepoints) noexcept;
+	template<class u32xN> u32xN length(const u32xN codepoints) noexcept;

 	// Encode char32_t codepoints into respective utf-8 encodings
-	u32x16 encode(const u32x16 codepoints) noexcept;
-	u32x8 encode(const u32x8 codepoints) noexcept;
-	u32x4 encode(const u32x4 codepoints) noexcept;
+	template<class u32xN> u32xN encode(const u32xN codepoints) noexcept;

 	// Decode utf-8 string into char32_t unicode codepoints
 	u32x16 decode(const u8x16 string) noexcept;
@ -38,10 +34,10 @@ namespace ircd::utf8
 namespace ircd::utf16
 {
 	// mask all surrogate characters from find_() result
-	u8x16 mask_surrogate(const u8x16 found) noexcept;
+	template<class u8xN> u8xN mask_surrogate(const u8xN found) noexcept;

 	// scan for utf-16 surrogates
-	u8x16 find_surrogate(const u8x16 input) noexcept;
+	template<class u8xN> u8xN find_surrogate(const u8xN input) noexcept;

 	// scan for utf-16 surrogates including incomplete sequences truncated
 	u8x16 find_surrogate_partial(const u8x16 input) noexcept;
@ -50,11 +46,15 @@ namespace ircd::utf16
 	u32x4 decode_surrogate_aligned_next(const u8x16 input) noexcept;
 }

-inline ircd::u8x16
-ircd::utf16::mask_surrogate(const u8x16 found)
+/// The vector returned by find_surrogate() masks the leading character of
+/// every valid surrogate (i.e. the '\'). This is a convenience to mask
+/// the full surrogate from such a result.
+template<class u8xN>
+inline u8xN
+ircd::utf16::mask_surrogate(const u8xN found)
 noexcept
 {
-	return u8x16
+	return u8xN
 	{
 		shl<0x08>(found) |
 		shl<0x10>(found) |
--- a/ircd/utf.cc
+++ b/ircd/utf.cc
@ -252,23 +252,31 @@ noexcept
 	return ret;
 }

-ircd::u8x16
-ircd::utf16::find_surrogate(const u8x16 input)
+namespace ircd::utf16
+{
+	template u8x16 utf16::find_surrogate<u8x16>(const u8x16) noexcept;
+	template u8x32 utf16::find_surrogate<u8x32>(const u8x32) noexcept;
+	template u8x64 utf16::find_surrogate<u8x64>(const u8x64) noexcept;
+}
+
+template<class u8xN>
+u8xN
+ircd::utf16::find_surrogate(const u8xN input)
 noexcept
 {
-	const u8x16 hex_nibble[3]
+	const u8xN hex_nibble[3]
 	{
 		input >= '0' && input <= '9',
 		input >= 'A' && input <= 'F',
 		input >= 'a' && input <= 'f',
 	};

-	const u8x16 is_hex_nibble
+	const u8xN is_hex_nibble
 	{
 		hex_nibble[0] | hex_nibble[1] | hex_nibble[2]
 	};

-	const auto is_surrogate
+	const u8xN is_surrogate
 	{
 		(input == '\\') &
 		shr<8>(input == 'u') &
@ -366,10 +374,10 @@ noexcept

 namespace ircd::utf8
 {
-	template<class u32xN>
-	static u32xN _encode(const u32xN codepoint) noexcept;
+	template<class u32xN> static u32xN _encode(const u32xN codepoint) noexcept;
 }

+template<>
 ircd::u32x4
 ircd::utf8::encode(const u32x4 codepoint)
 noexcept
@ -377,6 +385,7 @@ noexcept
 	return _encode(codepoint);
 }

+template<>
 ircd::u32x8
 ircd::utf8::encode(const u32x8 codepoint)
 noexcept
@ -403,6 +412,7 @@ noexcept
 }
 #endif

+template<>
 ircd::u32x16
 ircd::utf8::encode(const u32x16 codepoint)
 noexcept
@ -474,10 +484,10 @@ noexcept

 namespace ircd::utf8
 {
-	template<class u32xN>
-	static u32xN _length(const u32xN codepoint) noexcept;
+	template<class u32xN> static u32xN _length(const u32xN codepoint) noexcept;
 }

+template<>
 ircd::u32x4
 ircd::utf8::length(const u32x4 codepoint)
 noexcept
@ -485,6 +495,7 @@ noexcept
 	return _length(codepoint);
 }

+template<>
 ircd::u32x8
 ircd::utf8::length(const u32x8 codepoint)
 noexcept
@ -511,6 +522,7 @@ noexcept
 }
 #endif

+template<>
 ircd::u32x16
 ircd::utf8::length(const u32x16 codepoint)
 noexcept