2020-06-27 14:27:16 -07:00
|
|
|
// The Construct
|
|
|
|
//
|
|
|
|
// Copyright (C) The Construct Developers, Authors & Contributors
|
2021-08-11 04:31:32 -07:00
|
|
|
// Copyright (C) 2016-2021 Jason Volk <jason@zemos.net>
|
2020-06-27 14:27:16 -07:00
|
|
|
//
|
|
|
|
// Permission to use, copy, modify, and/or distribute this software for any
|
|
|
|
// purpose with or without fee is hereby granted, provided that the above
|
|
|
|
// copyright notice and this permission notice is present in all copies. The
|
|
|
|
// full license for this software is available in the LICENSE file.
|
|
|
|
|
|
|
|
#pragma once
|
2021-08-11 04:31:32 -07:00
|
|
|
#define HAVE_IRCD_UTF16_H
|
2020-06-27 14:27:16 -07:00
|
|
|
|
|
|
|
/// Unicode Transformation Format (16-bit)
|
|
|
|
namespace ircd::utf16
|
|
|
|
{
|
2020-06-29 19:23:54 -07:00
|
|
|
// mask all surrogate characters from find_() result
|
2020-09-03 17:44:39 -07:00
|
|
|
template<class u8xN> u8xN mask_surrogate(const u8xN found) noexcept;
|
2020-06-29 19:23:54 -07:00
|
|
|
|
|
|
|
// scan for utf-16 surrogates
|
2020-09-03 17:44:39 -07:00
|
|
|
template<class u8xN> u8xN find_surrogate(const u8xN input) noexcept;
|
2020-06-29 19:23:54 -07:00
|
|
|
|
|
|
|
// scan for utf-16 surrogates including incomplete sequences truncated
|
|
|
|
u8x16 find_surrogate_partial(const u8x16 input) noexcept;
|
2020-07-02 16:39:14 -07:00
|
|
|
|
|
|
|
// decodes one or two surrogates at the front into one or two codepoints
|
|
|
|
u32x4 decode_surrogate_aligned_next(const u8x16 input) noexcept;
|
2020-06-29 19:23:54 -07:00
|
|
|
}
|
|
|
|
|
2020-09-03 17:44:39 -07:00
|
|
|
/// The vector returned by find_surrogate() masks the leading character of
|
|
|
|
/// every valid surrogate (i.e. the '\'). This is a convenience to mask
|
|
|
|
/// the full surrogate from such a result.
|
|
|
|
template<class u8xN>
|
|
|
|
inline u8xN
|
|
|
|
ircd::utf16::mask_surrogate(const u8xN found)
|
2020-06-29 19:23:54 -07:00
|
|
|
noexcept
|
|
|
|
{
|
2020-09-03 17:44:39 -07:00
|
|
|
return u8xN
|
2020-06-29 19:23:54 -07:00
|
|
|
{
|
2020-07-08 17:26:48 -07:00
|
|
|
shl<0x08>(found) |
|
|
|
|
shl<0x10>(found) |
|
|
|
|
shl<0x18>(found) |
|
|
|
|
shl<0x20>(found) |
|
|
|
|
shl<0x28>(found) |
|
2020-06-29 19:23:54 -07:00
|
|
|
found
|
|
|
|
};
|
2020-06-27 14:27:16 -07:00
|
|
|
}
|