2021-03-30 03:22:42 +02:00
|
|
|
// Matrix Construct
|
|
|
|
//
|
|
|
|
// Copyright (C) Matrix Construct Developers, Authors & Contributors
|
|
|
|
// Copyright (C) 2016-2021 Jason Volk <jason@zemos.net>
|
|
|
|
//
|
|
|
|
// Permission to use, copy, modify, and/or distribute this software for any
|
|
|
|
// purpose with or without fee is hereby granted, provided that the above
|
|
|
|
// copyright notice and this permission notice is present in all copies. The
|
|
|
|
// full license for this software is available in the LICENSE file.
|
|
|
|
|
|
|
|
#pragma once
|
2021-04-02 22:01:38 +02:00
|
|
|
#define HAVE_IRCD_GPT_TOKEN_H
|
2021-03-30 03:22:42 +02:00
|
|
|
|
2022-06-20 03:59:29 +02:00
|
|
|
namespace ircd::gpt
|
2021-03-30 03:22:42 +02:00
|
|
|
{
|
2022-06-20 03:59:29 +02:00
|
|
|
struct token;
|
|
|
|
}
|
2021-03-30 03:22:42 +02:00
|
|
|
|
2022-06-20 03:59:29 +02:00
|
|
|
/// Token is just a 16-bit index into the vocabulary. This lightweight wrapper
|
|
|
|
/// convenience constructs from a string lookup or from a u16 directly.
|
|
|
|
class ircd::gpt::token
|
2021-03-30 03:22:42 +02:00
|
|
|
{
|
2022-06-20 03:59:29 +02:00
|
|
|
uint16_t val;
|
2021-03-30 03:22:42 +02:00
|
|
|
|
2022-06-20 03:59:29 +02:00
|
|
|
public:
|
|
|
|
operator const uint16_t &() const;
|
|
|
|
operator uint16_t &();
|
2021-03-30 03:22:42 +02:00
|
|
|
|
2022-06-20 03:59:29 +02:00
|
|
|
operator string_view() const;
|
2021-04-11 04:28:23 +02:00
|
|
|
|
2022-06-20 03:59:29 +02:00
|
|
|
token(const_buffer &buf) noexcept;
|
2022-07-02 01:33:09 +02:00
|
|
|
token(const string_view &, const bool prefix_space = false);
|
2022-06-20 03:59:29 +02:00
|
|
|
token(const uint16_t &) noexcept;
|
2021-03-30 03:22:42 +02:00
|
|
|
};
|
|
|
|
|
2022-06-20 03:59:29 +02:00
|
|
|
static_assert(sizeof(ircd::gpt::token) == sizeof(uint16_t));
|
|
|
|
static_assert(std::is_standard_layout<ircd::gpt::token>::value);
|
|
|
|
|
|
|
|
/// Direct construction; no lookup
|
|
|
|
inline
|
|
|
|
ircd::gpt::token::token(const uint16_t &val)
|
|
|
|
noexcept
|
|
|
|
:val{val}
|
|
|
|
{}
|
|
|
|
|
|
|
|
/// Must resolve to one token or error thrown.
|
2022-07-02 01:33:09 +02:00
|
|
|
/// prefix_space=true internally prepends space for potentially better token.
|
2022-06-20 03:59:29 +02:00
|
|
|
inline
|
2022-07-02 01:33:09 +02:00
|
|
|
ircd::gpt::token::token(const string_view &str,
|
|
|
|
const bool prefix_space)
|
|
|
|
:val{vocab::tokenize(str, prefix_space)}
|
2022-06-20 03:59:29 +02:00
|
|
|
{}
|
|
|
|
|
|
|
|
/// Consumes input for one token off front of buf
|
|
|
|
inline
|
|
|
|
ircd::gpt::token::token(const_buffer &buf)
|
|
|
|
noexcept
|
|
|
|
:val{vocab::tokenize(buf)}
|
|
|
|
{}
|
|
|
|
|
|
|
|
inline ircd::gpt::token::operator
|
|
|
|
string_view()
|
|
|
|
const
|
2021-03-30 03:22:42 +02:00
|
|
|
{
|
2022-06-20 03:59:29 +02:00
|
|
|
return vocab::token[val];
|
|
|
|
}
|
2022-01-07 20:03:11 +01:00
|
|
|
|
2022-06-20 03:59:29 +02:00
|
|
|
inline ircd::gpt::token::operator
|
|
|
|
uint16_t &()
|
2022-01-07 20:03:11 +01:00
|
|
|
{
|
2022-06-20 03:59:29 +02:00
|
|
|
return val;
|
|
|
|
}
|
2022-01-07 20:03:11 +01:00
|
|
|
|
2022-06-20 03:59:29 +02:00
|
|
|
inline ircd::gpt::token::operator
|
|
|
|
const uint16_t &()
|
|
|
|
const
|
2022-01-07 20:03:11 +01:00
|
|
|
{
|
2022-06-20 03:59:29 +02:00
|
|
|
return val;
|
|
|
|
}
|