mirror of
https://github.com/matrix-construct/construct
synced 2024-12-28 00:14:07 +01:00
124 lines
2.3 KiB
C++
124 lines
2.3 KiB
C++
// Tensor Construct
|
|
//
|
|
// Copyright (C) Matrix Construct Developers, Authors & Contributors
|
|
// Copyright (C) 2016-2021 Jason Volk <jason@zemos.net>
|
|
//
|
|
// Permission to use, copy, modify, and/or distribute this software for any
|
|
// purpose with or without fee is hereby granted, provided that the above
|
|
// copyright notice and this permission notice is present in all copies. The
|
|
// full license for this software is available in the LICENSE file.
|
|
|
|
#pragma once
|
|
#define HAVE_IRCD_GPT_MODEL_H
|
|
|
|
namespace ircd::gpt::model
|
|
{
|
|
struct norm;
|
|
struct attn;
|
|
struct ffnn;
|
|
struct block;
|
|
struct embed;
|
|
struct decoder;
|
|
|
|
struct prop;
|
|
struct text;
|
|
|
|
extern decoder *default_model;
|
|
extern float *default_moment[2];
|
|
extern float *default_checkpoint[3];
|
|
extern string_view default_dataset;
|
|
extern std::vector<json::object> default_data;
|
|
|
|
constexpr auto alignment {4096};
|
|
}
|
|
|
|
/// Layer normalization
|
|
struct ircd::gpt::model::norm
|
|
{
|
|
union ircd_gpt_vector
|
|
bias alignas(alignment),
|
|
weight alignas(alignment);
|
|
};
|
|
|
|
/// Attention aperature
|
|
struct ircd::gpt::model::attn
|
|
{
|
|
model::norm
|
|
norm;
|
|
|
|
union ircd_gpt_attn_aperature
|
|
fcon_bias alignas(alignment),
|
|
fcon_weight alignas(alignment) [768];
|
|
|
|
union ircd_gpt_vector
|
|
proj_bias alignas(alignment),
|
|
proj_weight alignas(alignment) [768];
|
|
};
|
|
|
|
/// Feed-forward neural network
|
|
struct ircd::gpt::model::ffnn
|
|
{
|
|
model::norm
|
|
norm;
|
|
|
|
union ircd_gpt_ffnn_aperature
|
|
fcon_bias alignas(alignment),
|
|
fcon_weight alignas(alignment) [768];
|
|
|
|
union ircd_gpt_vector
|
|
proj_bias alignas(alignment),
|
|
proj_weight alignas(alignment) [3072];
|
|
};
|
|
|
|
/// Transformer block
|
|
struct ircd::gpt::model::block
|
|
{
|
|
model::attn
|
|
attn;
|
|
|
|
model::ffnn
|
|
ffnn;
|
|
};
|
|
|
|
/// Vocabulary embeddings
|
|
struct ircd::gpt::model::embed
|
|
{
|
|
model::norm
|
|
norm;
|
|
|
|
union ircd_gpt_vector
|
|
pos alignas(alignment) [1024],
|
|
token alignas(alignment) [65536];
|
|
};
|
|
|
|
/// Transformer decoder
|
|
struct alignas(ircd::gpt::model::alignment)
|
|
ircd::gpt::model::decoder
|
|
{
|
|
model::block
|
|
layer[12];
|
|
|
|
model::embed
|
|
embed;
|
|
};
|
|
|
|
struct ircd::gpt::model::prop
|
|
{
|
|
static constexpr const char
|
|
*const ended {"ended"},
|
|
*const id {"id"},
|
|
*const length {"length"},
|
|
*const text {"text"};
|
|
};
|
|
|
|
struct ircd::gpt::model::text
|
|
:json::tuple
|
|
<
|
|
json::property<prop::ended, bool>,
|
|
json::property<prop::id, uint>,
|
|
json::property<prop::length, uint>,
|
|
json::property<prop::text, json::string>
|
|
>
|
|
{
|
|
using super_type::tuple;
|
|
};
|