0
0
Fork 0
mirror of https://github.com/matrix-construct/construct synced 2025-01-13 08:23:56 +01:00

ircd::utf: Adjust vector types/syntaxes for GCC.

ircd::json: Adjust vector types for GCC.
This commit is contained in:
Jason Volk 2020-07-09 18:53:02 -07:00
parent b1a60451b7
commit 72a61a7426
2 changed files with 115 additions and 86 deletions

View file

@ -3308,9 +3308,9 @@ noexcept
};
block_t block
{
(
_mm_loadu_si128(si)
};
);
const u64x2 consume
{
@ -3371,19 +3371,19 @@ ircd::json::string_stringify(u8x16 &__restrict__ block,
const u8x16 block_mask)
{
const u8x16 is_esc
{
(
block == '\\'
};
);
const u8x16 is_quote
{
(
block == '"'
};
);
const u8x16 is_ctrl
{
(
block < 0x20
};
);
const u8x16 is_special
{
@ -3478,19 +3478,19 @@ ircd::json::string_stringify_utf16(u8x16 &__restrict__ block,
};
const u32x4 is_surrogate
{
(
utf16::find_surrogate(block & block_mask)
};
);
const u32x4 surrogate_mask
{
(
is_surrogate != 0U
};
);
const u32x4 is_ctrl
{
(
unicode < 0x20
};
);
const u32x4 length_encoded
{
@ -3508,10 +3508,10 @@ ircd::json::string_stringify_utf16(u8x16 &__restrict__ block,
u32(ctrl_tab_len[ctrl_idx[1]]),
};
const u128x1 is_non_bmp
{
const u32x4 is_non_bmp
(
unicode >= 0x10000U
};
);
const u32x4 is_surrogate_pair
{
@ -3533,9 +3533,9 @@ ircd::json::string_stringify_utf16(u8x16 &__restrict__ block,
};
const u8x16 encoded
{
(
encoded_sparse
};
);
size_t di(0);
for(size_t i(0); i < 2; ++i)
@ -3602,19 +3602,19 @@ ircd::json::string_serialized(const u8x16 block,
const u8x16 block_mask)
{
const u8x16 is_esc
{
(
block == '\\'
};
);
const u8x16 is_quote
{
(
block == '"'
};
);
const u8x16 is_ctrl
{
(
block < 0x20
};
);
const u8x16 is_special
{
@ -3689,14 +3689,14 @@ ircd::json::string_serialized_utf16(const u8x16 block,
const u8x16 block_mask)
{
const u32x4 is_surrogate
{
(
utf16::find_surrogate(block & block_mask)
};
);
const u32x4 surrogate_mask
{
(
is_surrogate != 0U
};
);
const u32x4 unicode
{
@ -3704,9 +3704,9 @@ ircd::json::string_serialized_utf16(const u8x16 block,
};
const u32x4 is_ctrl
{
(
unicode < 0x20
};
);
const u32x4 length_encoded
{
@ -3725,9 +3725,9 @@ ircd::json::string_serialized_utf16(const u8x16 block,
};
const u32x4 is_non_bmp
{
(
unicode >= 0x10000U
};
);
const u32x4 is_surrogate_pair
{

View file

@ -35,13 +35,13 @@ noexcept
| ((input - 0x61 + 0x0a) & is_hex[2])
};
const u128x1 is_hex_nibble
const u8x16 is_hex_nibble
{
is_hex[0] | is_hex[1] | is_hex[2]
};
// Masks the starting byte (the '\' char) of each valid surrogate.
const u32x4 is_surrogate
const u8x16 is_surrogate
{
(input == '\\') &
shr<8>(input == 'u') &
@ -56,9 +56,9 @@ noexcept
// matching those 6 byte inputs, so we shift the byte[6] over to byte[4]
// and stiffen the mask about to be generated.
const u32x4 surrogate_mask
{
((is_surrogate & 0xff) | (is_surrogate >> 16)) == 0xffU
};
(
((u32x4(is_surrogate) & 0xff) | (u32x4(is_surrogate) >> 16)) == 0xffU
);
// Decide if one or two surrogates were actually input and assert that
// between both lanes if so.
@ -89,7 +89,7 @@ noexcept
// Result for one or two unpaired surrogates
const u32x4 codepoint_unpaired
{
(
u8x16
{
hex_byte[2], hex_byte[0], 0, 0,
@ -97,19 +97,19 @@ noexcept
0, 0, 0, 0,
0, 0, 0, 0,
}
};
);
// Determine if the unpaired codepoints can make a surrogate pair
const u32x4 surrogate_pair_range
{
(
codepoint_unpaired >= 0xd800U && codepoint_unpaired <= 0xdfffU
};
);
// Mask lane[0] if the codepoints are actually a surrogate pair
const u32x4 surrogate_paired
{
(
surrogate_pair_range & shr<32>(surrogate_pair_range)
};
);
// Pre-processing shuffle for surrogate pair decode
const u32x4 codepoint_pre_paired
@ -127,9 +127,9 @@ noexcept
// Decide if the codepoint is in the supplementary plane (3+ bytes)
const u32x4 codepoint_high
{
(
(codepoint_paired > 0xffffU) & surrogate_paired
};
);
// Decide if the codepoint is in the BMP (2- bytes)
const u32x4 codepoint_low
@ -165,7 +165,7 @@ noexcept
namespace ircd::utf16
{
static const u128x1 full_mask {~u128x1{0}};
extern const u128x1 truncation_table[6];
extern const u8x16 truncation_table[6];
}
decltype(ircd::utf16::truncation_table)
@ -185,24 +185,29 @@ ircd::u8x16
ircd::utf16::find_surrogate_partial(const u8x16 input)
noexcept
{
const u128x1 is_esc
{
const u8x16 is_esc
(
input == '\\'
};
);
const u128x1 is_u
{
const u8x16 is_u
(
input == 'u'
};
);
const u128x1 is_hex_nibble
const u8x16 hex_nibble[3]
{
(input >= '0' && input <= '9') ||
(input >= 'A' && input <= 'F') ||
(input >= 'a' && input <= 'f')
input >= '0' && input <= '9',
input >= 'A' && input <= 'F',
input >= 'a' && input <= 'f',
};
const u128x1 surrogate_sans[6]
const u8x16 is_hex_nibble
{
hex_nibble[0] | hex_nibble[1] | hex_nibble[2]
};
const u8x16 surrogate_sans[6]
{
// complete
is_esc
@ -234,7 +239,7 @@ noexcept
is_esc,
};
const u128x1 ret
const u8x16 ret
{
(surrogate_sans[0] & truncation_table[0]) |
(surrogate_sans[1] & truncation_table[1]) |
@ -251,11 +256,16 @@ ircd::u8x16
ircd::utf16::find_surrogate(const u8x16 input)
noexcept
{
const u128x1 is_hex_nibble
const u8x16 hex_nibble[3]
{
(input >= '0' && input <= '9') ||
(input >= 'A' && input <= 'F') ||
(input >= 'a' && input <= 'f')
input >= '0' && input <= '9',
input >= 'A' && input <= 'F',
input >= 'a' && input <= 'f',
};
const u8x16 is_hex_nibble
{
hex_nibble[0] | hex_nibble[1] | hex_nibble[2]
};
const auto is_surrogate
@ -300,24 +310,24 @@ ircd::utf8::decode(const u8x16 string)
noexcept
{
const u32x16 in
{
simd::lane_cast<u32x16>(string)
};
(
simd::lane_cast<u32x16, u8x16>(string)
);
const u32x16 is_single
{
(
(in & 0x80) == 0
};
);
const u32x16 is_lead
{
(
(in - 0xc2) <= 0x32
};
);
const u32x16 is_trail
{
(
in >= 0x80 && in < 0xbf
};
);
const u32x16 expect_trail
{
@ -477,22 +487,41 @@ u32xN
ircd::utf8::_length(const u32xN codepoint)
noexcept
{
const u32xN
length_1 { codepoint <= 0x7f },
length_2 { codepoint <= 0x7ff && codepoint > 0x7f },
length_3_lo { codepoint <= 0xd7ff && codepoint > 0x7ff },
length_3_hi { codepoint <= 0xffff && codepoint > 0xdfff },
length_4 { codepoint <= 0x10ffff && codepoint > 0xffff };
const u32xN len[5]
{
// length 1
codepoint <= 0x7f,
[[gnu::unused]] const u32xN // Preserved here for future reference
length_3_err { codepoint <= 0xdfff && codepoint > 0xd7ff },
length_err { (codepoint > 0x10ffff) | length_3_err };
// length 2
codepoint <= 0x7ff && codepoint > 0x7f,
// length 3 low
codepoint <= 0xd7ff && codepoint > 0x7ff,
// length 3 high
codepoint <= 0xffff && codepoint > 0xdfff,
// length 4
codepoint <= 0x10ffff && codepoint > 0xffff,
};
[[gnu::unused]] // Preserved here for future reference
const u32xN len_3_err
(
codepoint <= 0xdfff && codepoint > 0xd7ff
);
[[gnu::unused]] // Preserved here for future reference
const u32xN len_err
{
(codepoint > 0x10ffff) | len_3_err
};
return 0
| (length_1 & 1)
| (length_2 & 2)
| (length_3_lo & 3)
| (length_3_hi & 3)
| (length_4 & 4)
| (len[0] & 1)
| (len[1] & 2)
| (len[2] & 3)
| (len[3] & 3)
| (len[4] & 4)
;
}