mirror of
https://github.com/matrix-construct/construct
synced 2024-06-10 22:18:54 +02:00
ircd::gpt: Add task struct; mmap cached model directly; improve init.
This commit is contained in:
parent
6f3adfd160
commit
4da7d2ae43
27
include/ircd/gpt/generate.h
Normal file
27
include/ircd/gpt/generate.h
Normal file
|
@ -0,0 +1,27 @@
|
|||
// Matrix Construct
|
||||
//
|
||||
// Copyright (C) Matrix Construct Developers, Authors & Contributors
|
||||
// Copyright (C) 2016-2021 Jason Volk <jason@zemos.net>
|
||||
//
|
||||
// Permission to use, copy, modify, and/or distribute this software for any
|
||||
// purpose with or without fee is hereby granted, provided that the above
|
||||
// copyright notice and this permission notice is present in all copies. The
|
||||
// full license for this software is available in the LICENSE file.
|
||||
|
||||
#pragma once
|
||||
#define HAVE_IRCD_GPT_GENERATE_H
|
||||
|
||||
namespace ircd::gpt
|
||||
{
|
||||
vector_view<u16>
|
||||
generate(const vector_view<u16> &out,
|
||||
const vector_view<const u16> &in,
|
||||
const opts * = &default_opts,
|
||||
task * = nullptr);
|
||||
|
||||
string_view
|
||||
generate(const mutable_buffer &out,
|
||||
const string_view &in,
|
||||
const opts * = &default_opts,
|
||||
task * = nullptr);
|
||||
}
|
|
@ -18,25 +18,23 @@ namespace ircd::gpt
|
|||
IRCD_EXCEPTION(ircd::error, error)
|
||||
|
||||
struct opts;
|
||||
struct context;
|
||||
struct task;
|
||||
|
||||
extern const opts default_opts;
|
||||
extern log::log log;
|
||||
|
||||
vector_view<u16>
|
||||
generate(const vector_view<u16> &out,
|
||||
const vector_view<const u16> &in,
|
||||
const opts & = default_opts);
|
||||
|
||||
string_view
|
||||
generate(const mutable_buffer &out,
|
||||
const string_view &in,
|
||||
const opts & = default_opts);
|
||||
}
|
||||
|
||||
#include "vocab.h"
|
||||
#include "model.h"
|
||||
#include "task.h"
|
||||
#include "generate.h"
|
||||
|
||||
/// Primary Options
|
||||
///
|
||||
/// Use this structure to configure and control specifics of the machine.
|
||||
/// These settings are immutable for the operations. To maintain state between
|
||||
/// calls see task.h
|
||||
///
|
||||
struct ircd::gpt::opts
|
||||
{
|
||||
/// Specifies the nominal halting condition based on the sequence of
|
||||
|
@ -64,9 +62,21 @@ struct ircd::gpt::opts
|
|||
|
||||
/// Limit number of output tokens. Default of -1 is unlimited; the number
|
||||
/// of tokens generated will be limited by other factors.
|
||||
uint limit {-1U};
|
||||
uint limit
|
||||
{
|
||||
-1U
|
||||
};
|
||||
|
||||
/// Flip random coins over the top k logits each round. Setting to 1
|
||||
/// deterministically selects the top logit.
|
||||
uint top_k {2};
|
||||
uint top_k
|
||||
{
|
||||
2
|
||||
};
|
||||
|
||||
/// Pointer to the model
|
||||
const model::decoder *model
|
||||
{
|
||||
model::default_model
|
||||
};
|
||||
};
|
||||
|
|
|
@ -18,6 +18,8 @@ namespace ircd::gpt::model
|
|||
struct ffnn;
|
||||
struct block;
|
||||
struct decoder;
|
||||
|
||||
extern const decoder *default_model;
|
||||
}
|
||||
|
||||
/// Attention aperature
|
||||
|
|
57
include/ircd/gpt/task.h
Normal file
57
include/ircd/gpt/task.h
Normal file
|
@ -0,0 +1,57 @@
|
|||
// Matrix Construct
|
||||
//
|
||||
// Copyright (C) Matrix Construct Developers, Authors & Contributors
|
||||
// Copyright (C) 2016-2021 Jason Volk <jason@zemos.net>
|
||||
//
|
||||
// Permission to use, copy, modify, and/or distribute this software for any
|
||||
// purpose with or without fee is hereby granted, provided that the above
|
||||
// copyright notice and this permission notice is present in all copies. The
|
||||
// full license for this software is available in the LICENSE file.
|
||||
|
||||
#pragma once
|
||||
#define HAVE_IRCD_GPT_TASK_H
|
||||
|
||||
/// Context to maintain state across calls.
|
||||
///
|
||||
struct ircd::gpt::task
|
||||
{
|
||||
enum status :char;
|
||||
|
||||
/// Reference to the attached options.
|
||||
const gpt::opts *opts {nullptr};
|
||||
|
||||
/// Current task status.
|
||||
enum status status {'\0'};
|
||||
|
||||
/// Accumulates the number of executions by the user. Each call to the
|
||||
/// interface is an execution.
|
||||
uint64_t epoch {0};
|
||||
|
||||
/// Accumulates the number of tokens produced by the task. Several tokens
|
||||
/// may be produced each epoch.
|
||||
uint64_t produced {0};
|
||||
|
||||
/// Accumulates the number tokens witnessed by the task. The number of
|
||||
/// tokens in the context for each produced token is counted as witnessed.
|
||||
uint64_t witnessed {0};
|
||||
|
||||
/// Accumulates the number of CPU reference cycles consumed by the task.
|
||||
/// This counter does not reflect time when the task is queued or waiting
|
||||
/// or offloaded to a co-processor/accelerator.
|
||||
uint64_t cycles {0};
|
||||
|
||||
/// Accumulates the total time in milliseconds over all executions of the
|
||||
/// task. This counter reflects total wall-clock time of all phases of
|
||||
/// the execution.
|
||||
milliseconds time {0ms};
|
||||
};
|
||||
|
||||
/// The current status of a task is indicated with intelligible characters
|
||||
enum ircd::gpt::task::status
|
||||
:char
|
||||
{
|
||||
QUEUED = 'Q', ///< Queued for execution.
|
||||
RUNNING = 'R', ///< Currently being executed.
|
||||
ACCEPT = 'A', ///< Execution completed successfully.
|
||||
ERROR = 'E', ///< Execution did not complete successfully.
|
||||
};
|
67
ircd/gpt.cc
67
ircd/gpt.cc
|
@ -28,28 +28,22 @@ namespace ircd::gpt
|
|||
static void logits(float *, const float (&)[768], const model::decoder &);
|
||||
static void tail(float *, const float *, const model::decoder &);
|
||||
static u16 argmax(const float *, const opts &);
|
||||
|
||||
static vector_view<f32> embed(const vector_view<f32> &out, const u16 token, const u16 position);
|
||||
|
||||
std::unique_ptr<model::decoder> device
|
||||
{
|
||||
new model::decoder{}
|
||||
};
|
||||
static void embed(float *, const u16 token, const u16 position, const opts &);
|
||||
|
||||
static f32
|
||||
logit alignas(64) [65536],
|
||||
scratch alignas(64) [1024 * 768];
|
||||
}
|
||||
|
||||
decltype(ircd::gpt::default_opts)
|
||||
ircd::gpt::default_opts;
|
||||
|
||||
decltype(ircd::gpt::log)
|
||||
ircd::gpt::log
|
||||
{
|
||||
"gpt"
|
||||
};
|
||||
|
||||
decltype(ircd::gpt::default_opts)
|
||||
ircd::gpt::default_opts;
|
||||
|
||||
namespace ircd::gpt::model
|
||||
{
|
||||
constexpr float embed_pdrop
|
||||
|
@ -86,7 +80,8 @@ namespace ircd::gpt::model
|
|||
ircd::string_view
|
||||
ircd::gpt::generate(const mutable_buffer &out,
|
||||
const string_view &in,
|
||||
const opts &opts)
|
||||
const opts *opts,
|
||||
task *task)
|
||||
{
|
||||
u16 buf[2][256];
|
||||
const auto input_tokens
|
||||
|
@ -96,7 +91,7 @@ ircd::gpt::generate(const mutable_buffer &out,
|
|||
|
||||
const auto output_tokens
|
||||
{
|
||||
generate(buf[1], input_tokens, opts)
|
||||
generate(buf[1], input_tokens, opts, task)
|
||||
};
|
||||
|
||||
const auto output
|
||||
|
@ -110,12 +105,13 @@ ircd::gpt::generate(const mutable_buffer &out,
|
|||
ircd::vector_view<ircd::u16>
|
||||
ircd::gpt::generate(const vector_view<u16> &out,
|
||||
const vector_view<const u16> &in,
|
||||
const opts &opts)
|
||||
const opts *opts,
|
||||
task *task)
|
||||
{
|
||||
size_t ret(0);
|
||||
bool halt(false);
|
||||
uint errc[3] {0}, accc[3] {0};
|
||||
for(uint i(0); !halt && i < out.size() && ret < opts.limit; ++i)
|
||||
for(uint i(0); !halt && i < out.size() && ret < opts->limit; ++i)
|
||||
{
|
||||
const size_t tokens
|
||||
{
|
||||
|
@ -134,10 +130,7 @@ ircd::gpt::generate(const vector_view<u16> &out,
|
|||
data(scratch) + j * 768, 768
|
||||
};
|
||||
|
||||
const auto embedding
|
||||
{
|
||||
embed(dst, in[j], j)
|
||||
};
|
||||
embed(data(dst), in[j], j, *opts);
|
||||
}
|
||||
|
||||
for(uint j(0); j < ret; ++j)
|
||||
|
@ -147,32 +140,29 @@ ircd::gpt::generate(const vector_view<u16> &out,
|
|||
data(scratch) + (in.size() + j) * 768, 768
|
||||
};
|
||||
|
||||
const auto embedding
|
||||
{
|
||||
embed(dst, out[j], in.size() + j)
|
||||
};
|
||||
embed(data(dst), out[j], in.size() + j, *opts);
|
||||
}
|
||||
|
||||
transform(data(scratch), tokens, *device);
|
||||
transform(data(scratch), tokens, *opts->model);
|
||||
|
||||
const vector_view<f32> last_embed
|
||||
{
|
||||
data(scratch) + ((tokens - 1) * 768), 768
|
||||
};
|
||||
|
||||
tail(logit, data(last_embed), *device);
|
||||
out[i] = argmax(logit, opts);
|
||||
tail(logit, data(last_embed), *opts->model);
|
||||
out[i] = argmax(logit, *opts);
|
||||
|
||||
for(uint j(0); j < 3; ++j)
|
||||
{
|
||||
errc[j] = out[i] == opts.error_code[j][errc[j]]? errc[j] + 1: 0;
|
||||
accc[j] = out[i] == opts.accept_code[j][accc[j]]? accc[j] + 1: 0;
|
||||
errc[j] = out[i] == opts->error_code[j][errc[j]]? errc[j] + 1: 0;
|
||||
accc[j] = out[i] == opts->accept_code[j][accc[j]]? accc[j] + 1: 0;
|
||||
}
|
||||
|
||||
for(uint j(0); j < 3; ++j)
|
||||
{
|
||||
halt |= errc[j] >= 3 || (errc[j] && opts.error_code[j][errc[j] + 1] == -1U);
|
||||
halt |= accc[j] >= 3 || (accc[j] && opts.accept_code[j][accc[j] + 1] == -1U);
|
||||
halt |= errc[j] >= 3 || (errc[j] && opts->error_code[j][errc[j] + 1] == -1U);
|
||||
halt |= accc[j] >= 3 || (accc[j] && opts->accept_code[j][accc[j] + 1] == -1U);
|
||||
}
|
||||
|
||||
++ret;
|
||||
|
@ -184,30 +174,25 @@ ircd::gpt::generate(const vector_view<u16> &out,
|
|||
};
|
||||
}
|
||||
|
||||
ircd::vector_view<ircd::f32>
|
||||
ircd::gpt::embed(const vector_view<f32> &out,
|
||||
void
|
||||
ircd::gpt::embed(float *const out,
|
||||
const u16 token,
|
||||
const u16 position)
|
||||
const u16 position,
|
||||
const opts &opts)
|
||||
{
|
||||
assert(device);
|
||||
|
||||
assert(opts.model);
|
||||
const auto &wpe
|
||||
{
|
||||
device->wpe[position]
|
||||
opts.model->wpe[position]
|
||||
};
|
||||
|
||||
const auto &wte
|
||||
{
|
||||
device->wte[token]
|
||||
opts.model->wte[token]
|
||||
};
|
||||
|
||||
for(uint j(0); j < 768; ++j)
|
||||
out[j] = wte[j] + wpe[j];
|
||||
|
||||
return vector_view<f32>
|
||||
{
|
||||
data(out), 768
|
||||
};
|
||||
}
|
||||
|
||||
uint16_t
|
||||
|
|
|
@ -10,6 +10,9 @@
|
|||
|
||||
namespace ircd::gpt::model
|
||||
{
|
||||
using init_func = void (*)(decoder &, const string_view &, const size_t &, const json::array &);
|
||||
using init_handler = std::pair<string_view, init_func>;
|
||||
|
||||
static void
|
||||
init_f_weight(decoder &, const string_view &, const size_t &, const json::array &),
|
||||
init_f_bias(decoder &, const string_view &, const size_t &, const json::array &),
|
||||
|
@ -27,18 +30,23 @@ namespace ircd::gpt::model
|
|||
init_h_attn_attn_bias(decoder &, const string_view &, const size_t &, const json::array &),
|
||||
init_h_attn_proj_weight(decoder &, const string_view &, const size_t &, const json::array &),
|
||||
init_h_attn_proj_bias(decoder &, const string_view &, const size_t &, const json::array &),
|
||||
init_h_attn_bias(decoder &, const string_view &, const size_t &, const json::array &),
|
||||
init() noexcept;
|
||||
init_h_attn_bias(decoder &, const string_view &, const size_t &, const json::array &);
|
||||
|
||||
extern conf::item<std::string> path;
|
||||
extern const std::pair
|
||||
<
|
||||
string_view,
|
||||
void (*)(decoder &, const string_view &, const size_t &, const json::array &)
|
||||
>
|
||||
static bool init_from_cache(const string_view &);
|
||||
static void init_from_json_handle(decoder &, const init_handler &, const size_t &);
|
||||
static void init_from_json(const string_view &, const string_view &);
|
||||
static void init() noexcept;
|
||||
|
||||
extern const init_handler
|
||||
manifest[],
|
||||
manifest_h[],
|
||||
manifest_td[];
|
||||
|
||||
extern conf::item<std::string> path;
|
||||
extern conf::item<std::string> cache_path;
|
||||
|
||||
static fs::map default_model_shm;
|
||||
static std::unique_ptr<decoder> default_model_res;
|
||||
}
|
||||
|
||||
decltype(ircd::gpt::model::manifest_h)
|
||||
|
@ -76,6 +84,13 @@ ircd::gpt::model::manifest_td
|
|||
{ "train.jsonl", nullptr, },
|
||||
};
|
||||
|
||||
decltype(ircd::gpt::model::cache_path)
|
||||
ircd::gpt::model::cache_path
|
||||
{
|
||||
{ "name", "ircd.gpt.model.cache.path" },
|
||||
{ "default", "model.cache.localhost" },
|
||||
};
|
||||
|
||||
decltype(ircd::gpt::model::path)
|
||||
ircd::gpt::model::path
|
||||
{
|
||||
|
@ -86,11 +101,8 @@ ircd::gpt::model::path
|
|||
init
|
||||
};
|
||||
|
||||
//TODO: XXX
|
||||
namespace ircd::gpt
|
||||
{
|
||||
extern const std::unique_ptr<model::decoder> device;
|
||||
}
|
||||
decltype(ircd::gpt::model::default_model)
|
||||
ircd::gpt::model::default_model;
|
||||
|
||||
void
|
||||
ircd::gpt::model::init()
|
||||
|
@ -99,93 +111,164 @@ noexcept
|
|||
if(!model::path)
|
||||
return;
|
||||
|
||||
const size_t layers
|
||||
if(!init_from_cache(model::cache_path))
|
||||
init_from_json(model::cache_path, model::path);
|
||||
}
|
||||
|
||||
bool
|
||||
ircd::gpt::model::init_from_cache(const string_view &cache_path)
|
||||
{
|
||||
if(!fs::is_reg(cache_path))
|
||||
return false;
|
||||
|
||||
const auto size
|
||||
{
|
||||
12
|
||||
fs::size(cache_path)
|
||||
};
|
||||
|
||||
const auto handle{[]
|
||||
(const auto &a, const auto &b, const auto &i)
|
||||
if(unlikely(size != sizeof(model::decoder)))
|
||||
throw error
|
||||
{
|
||||
"Cached model `%s' size %zu differs from %zu.",
|
||||
cache_path,
|
||||
size,
|
||||
sizeof(model::decoder),
|
||||
};
|
||||
|
||||
const fs::fd fd
|
||||
{
|
||||
const auto &[fmt, handler]
|
||||
{
|
||||
a[b]
|
||||
};
|
||||
cache_path
|
||||
};
|
||||
|
||||
char namebuf[128] {0};
|
||||
const string_view path_part[2]
|
||||
{
|
||||
model::path, fmt::sprintf
|
||||
{
|
||||
namebuf, fmt, i
|
||||
}
|
||||
};
|
||||
|
||||
const fs::fd fd
|
||||
{
|
||||
fs::path(fs::path_scratch, path_part)
|
||||
};
|
||||
|
||||
fs::map::opts map_opts;
|
||||
const fs::map map
|
||||
{
|
||||
fd, map_opts
|
||||
};
|
||||
|
||||
const json::array mat
|
||||
{
|
||||
map
|
||||
};
|
||||
|
||||
assert(gpt::device);
|
||||
handler(*gpt::device, path_part[1], i, mat);
|
||||
log::logf
|
||||
{
|
||||
log, log::level::DEBUG,
|
||||
"Model init [%2d][%2d] :%s",
|
||||
i,
|
||||
b,
|
||||
path_part[1],
|
||||
};
|
||||
}};
|
||||
|
||||
ircd::timer sw;
|
||||
size_t read(0), wrote(0);
|
||||
if(fs::exists("model"))
|
||||
fs::map::opts map_opts;
|
||||
default_model_shm = fs::map
|
||||
{
|
||||
const auto _read
|
||||
{
|
||||
fs::read(fs::fd{"model"}, mutable_buffer{(char *)(gpt::device.get()), sizeof(model::decoder)})
|
||||
};
|
||||
fd, map_opts, sizeof(decoder)
|
||||
};
|
||||
|
||||
read = size(_read);
|
||||
} else {
|
||||
memset(device.get(), 0x0, sizeof(model::decoder));
|
||||
default_model = reinterpret_cast<decoder *>
|
||||
(
|
||||
data(default_model_shm)
|
||||
);
|
||||
|
||||
handle(manifest, 0, 0);
|
||||
handle(manifest, 1, 0);
|
||||
handle(manifest, 2, 0);
|
||||
handle(manifest, 3, 0);
|
||||
for(size_t i(0); i < layers; ++i)
|
||||
for(size_t j(0); j < 13; ++j)
|
||||
handle(manifest_h, j, i);
|
||||
|
||||
const auto _wrote
|
||||
{
|
||||
fs::write("model", const_buffer{(const char *)(gpt::device.get()), sizeof(model::decoder)})
|
||||
};
|
||||
|
||||
wrote = size(_wrote);
|
||||
}
|
||||
|
||||
char pbuf[3][48];
|
||||
log::logf
|
||||
char pbuf[48];
|
||||
log::info
|
||||
{
|
||||
log, log::level::DEBUG,
|
||||
"Model init completed in %s read %s wrote %s",
|
||||
sw.pretty(pbuf[0]),
|
||||
pretty(pbuf[1], iec(size(read))),
|
||||
pretty(pbuf[2], iec(size(wrote))),
|
||||
log, "model(%p) mapped cached model `%s' %s",
|
||||
data(default_model_shm),
|
||||
cache_path,
|
||||
pretty(pbuf, iec(size)),
|
||||
};
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
ircd::gpt::model::init_from_json(const string_view &cache_path,
|
||||
const string_view &model_path)
|
||||
{
|
||||
util::timer stopwatch;
|
||||
auto decoder
|
||||
{
|
||||
std::make_unique<model::decoder>()
|
||||
};
|
||||
|
||||
// Load the top level files, vocab etc
|
||||
for(size_t i(0); i < 4; ++i)
|
||||
init_from_json_handle(*decoder, manifest[i], 0);
|
||||
|
||||
// Load the transformer files by layer
|
||||
const size_t layers {12};
|
||||
for(size_t i(0); i < layers; ++i)
|
||||
for(size_t j(0); j < 13; ++j)
|
||||
init_from_json_handle(*decoder, manifest_h[j], i);
|
||||
|
||||
const const_buffer src
|
||||
{
|
||||
reinterpret_cast<char *>(decoder.get()), sizeof(model::decoder)
|
||||
};
|
||||
|
||||
const auto wrote
|
||||
{
|
||||
fs::write(cache_path, src)
|
||||
};
|
||||
|
||||
char pbuf[2][48];
|
||||
log::info
|
||||
{
|
||||
log, "model(%p) parsed `%s' cached %s to `%s' in %s",
|
||||
decoder.get(),
|
||||
model_path,
|
||||
pretty(pbuf[0], iec(size(wrote))),
|
||||
cache_path,
|
||||
stopwatch.pretty(pbuf[1]),
|
||||
};
|
||||
|
||||
default_model_res = std::move(decoder);
|
||||
default_model = default_model_res.get();
|
||||
}
|
||||
|
||||
void
|
||||
ircd::gpt::model::init_from_json_handle(decoder &d,
|
||||
const init_handler &handler,
|
||||
const size_t &layer)
|
||||
{
|
||||
const auto &[fmt, func]
|
||||
{
|
||||
handler
|
||||
};
|
||||
|
||||
char namebuf[128];
|
||||
const string_view path_part[2]
|
||||
{
|
||||
model::path, fmt::sprintf
|
||||
{
|
||||
namebuf, fmt, layer
|
||||
}
|
||||
};
|
||||
|
||||
const auto path
|
||||
{
|
||||
fs::path(fs::path_scratch, path_part)
|
||||
};
|
||||
|
||||
fs::fd::opts fdopts;
|
||||
fdopts.sequential = true;
|
||||
const fs::fd fd
|
||||
{
|
||||
path, fdopts
|
||||
};
|
||||
|
||||
// mmap of the file
|
||||
const fs::map map
|
||||
{
|
||||
fd
|
||||
};
|
||||
|
||||
// Each file is a JSON array at the top level.
|
||||
const json::array matrix
|
||||
{
|
||||
map
|
||||
};
|
||||
|
||||
// Readable name for logging
|
||||
const auto &name
|
||||
{
|
||||
path_part[1]
|
||||
};
|
||||
|
||||
if(likely(func))
|
||||
func(d, name, layer, matrix);
|
||||
|
||||
// Check for interrupt after long operation
|
||||
ctx::interruption_point();
|
||||
|
||||
log::info
|
||||
{
|
||||
log, "model(%p) loaded layer:%zu :%s",
|
||||
&d,
|
||||
layer,
|
||||
name,
|
||||
};
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue