0
0
Fork 0
mirror of https://github.com/matrix-construct/construct synced 2025-01-09 14:25:56 +01:00
construct/include/ircd/gpt/model.h

126 lines
2.4 KiB
C
Raw Normal View History

2021-03-05 02:03:33 +01:00
// Tensor Construct
//
// Copyright (C) Matrix Construct Developers, Authors & Contributors
// Copyright (C) 2016-2021 Jason Volk <jason@zemos.net>
//
// Permission to use, copy, modify, and/or distribute this software for any
// purpose with or without fee is hereby granted, provided that the above
// copyright notice and this permission notice is present in all copies. The
// full license for this software is available in the LICENSE file.
#pragma once
#define HAVE_IRCD_GPT_MODEL_H
namespace ircd::gpt::model
{
struct norm;
struct attn;
struct ffnn;
struct block;
struct embed;
2021-03-05 02:03:33 +01:00
struct decoder;
2022-06-20 03:59:29 +02:00
struct prop;
struct text;
2021-04-17 20:59:30 +02:00
extern decoder *default_model;
2022-06-20 03:59:29 +02:00
extern float *default_moment[2];
extern float *default_checkpoint[3];
extern string_view default_dataset;
extern std::vector<json::object> default_data;
2022-06-20 03:59:29 +02:00
constexpr auto alignment {4096};
extern conf::item<bool> cache_shared;
2021-03-05 02:03:33 +01:00
}
2022-06-20 03:59:29 +02:00
/// Layer normalization
struct ircd::gpt::model::norm
{
union ircd_gpt_vector
bias alignas(alignment),
weight alignas(alignment);
};
2021-03-05 02:03:33 +01:00
/// Attention aperature
struct ircd::gpt::model::attn
{
2022-06-20 03:59:29 +02:00
model::norm
norm;
union ircd_gpt_attn_aperature
fcon_bias alignas(alignment),
fcon_weight alignas(alignment) [768];
2022-06-20 03:59:29 +02:00
union ircd_gpt_vector
proj_bias alignas(alignment),
proj_weight alignas(alignment) [768];
2021-03-05 02:03:33 +01:00
};
/// Feed-forward neural network
struct ircd::gpt::model::ffnn
{
2022-06-20 03:59:29 +02:00
model::norm
norm;
2022-06-20 03:59:29 +02:00
union ircd_gpt_ffnn_aperature
fcon_bias alignas(alignment),
fcon_weight alignas(alignment) [768];
2021-03-05 02:03:33 +01:00
2022-06-20 03:59:29 +02:00
union ircd_gpt_vector
proj_bias alignas(alignment),
proj_weight alignas(alignment) [3072];
2021-03-05 02:03:33 +01:00
};
/// Transformer block
struct ircd::gpt::model::block
{
2022-06-20 03:59:29 +02:00
model::attn
attn;
2022-06-20 03:59:29 +02:00
model::ffnn
ffnn;
};
2021-03-05 02:03:33 +01:00
/// Vocabulary embeddings
struct ircd::gpt::model::embed
2021-03-05 02:03:33 +01:00
{
2022-06-20 03:59:29 +02:00
model::norm
norm;
union ircd_gpt_vector
pos alignas(alignment) [1024],
token alignas(alignment) [65536];
};
2022-06-20 03:59:29 +02:00
/// Transformer decoder
struct alignas(ircd::gpt::model::alignment)
ircd::gpt::model::decoder
{
2022-06-20 03:59:29 +02:00
model::block
layer[12];
model::embed
embed;
};
2022-06-20 03:59:29 +02:00
struct ircd::gpt::model::prop
{
static constexpr const char
*const ended {"ended"},
*const id {"id"},
*const length {"length"},
*const text {"text"};
};
struct ircd::gpt::model::text
:json::tuple
<
json::property<prop::ended, bool>,
json::property<prop::id, uint>,
json::property<prop::length, uint>,
json::property<prop::text, json::string>
>
{
using super_type::tuple;
};