0
0
Fork 0
mirror of https://github.com/matrix-construct/construct synced 2025-01-02 10:54:16 +01:00
construct/include/ircd/gpt/token.h

80 lines
1.8 KiB
C
Raw Normal View History

2021-03-30 03:22:42 +02:00
// Matrix Construct
//
// Copyright (C) Matrix Construct Developers, Authors & Contributors
// Copyright (C) 2016-2021 Jason Volk <jason@zemos.net>
//
// Permission to use, copy, modify, and/or distribute this software for any
// purpose with or without fee is hereby granted, provided that the above
// copyright notice and this permission notice is present in all copies. The
// full license for this software is available in the LICENSE file.
#pragma once
2021-04-02 22:01:38 +02:00
#define HAVE_IRCD_GPT_TOKEN_H
2021-03-30 03:22:42 +02:00
2022-06-20 03:59:29 +02:00
namespace ircd::gpt
2021-03-30 03:22:42 +02:00
{
2022-06-20 03:59:29 +02:00
struct token;
}
2021-03-30 03:22:42 +02:00
2022-06-20 03:59:29 +02:00
/// Token is just a 16-bit index into the vocabulary. This lightweight wrapper
/// convenience constructs from a string lookup or from a u16 directly.
class ircd::gpt::token
2021-03-30 03:22:42 +02:00
{
2022-06-20 03:59:29 +02:00
uint16_t val;
2021-03-30 03:22:42 +02:00
2022-06-20 03:59:29 +02:00
public:
operator const uint16_t &() const;
operator uint16_t &();
2021-03-30 03:22:42 +02:00
2022-06-20 03:59:29 +02:00
operator string_view() const;
2022-06-20 03:59:29 +02:00
token(const_buffer &buf) noexcept;
token(const string_view &, const bool prefix_space = false);
2022-06-20 03:59:29 +02:00
token(const uint16_t &) noexcept;
2021-03-30 03:22:42 +02:00
};
2022-06-20 03:59:29 +02:00
static_assert(sizeof(ircd::gpt::token) == sizeof(uint16_t));
static_assert(std::is_standard_layout<ircd::gpt::token>::value);
/// Direct construction; no lookup
inline
ircd::gpt::token::token(const uint16_t &val)
noexcept
:val{val}
{}
/// Must resolve to one token or error thrown.
/// prefix_space=true internally prepends space for potentially better token.
2022-06-20 03:59:29 +02:00
inline
ircd::gpt::token::token(const string_view &str,
const bool prefix_space)
:val{vocab::tokenize(str, prefix_space)}
2022-06-20 03:59:29 +02:00
{}
/// Consumes input for one token off front of buf
inline
ircd::gpt::token::token(const_buffer &buf)
noexcept
:val{vocab::tokenize(buf)}
{}
inline ircd::gpt::token::operator
string_view()
const
2021-03-30 03:22:42 +02:00
{
2022-06-20 03:59:29 +02:00
return vocab::token[val];
}
2022-01-07 20:03:11 +01:00
2022-06-20 03:59:29 +02:00
inline ircd::gpt::token::operator
uint16_t &()
2022-01-07 20:03:11 +01:00
{
2022-06-20 03:59:29 +02:00
return val;
}
2022-01-07 20:03:11 +01:00
2022-06-20 03:59:29 +02:00
inline ircd::gpt::token::operator
const uint16_t &()
const
2022-01-07 20:03:11 +01:00
{
2022-06-20 03:59:29 +02:00
return val;
}