mirror of
https://github.com/matrix-construct/construct
synced 2025-01-13 16:33:53 +01:00
ircd::gpt: Reorganize task options and control blocks.
This commit is contained in:
parent
3e9c2d1b56
commit
37b1d47c8d
12 changed files with 527 additions and 643 deletions
|
@ -17,8 +17,8 @@ namespace ircd::gpt
|
|||
{
|
||||
IRCD_EXCEPTION(ircd::error, error)
|
||||
|
||||
struct opts;
|
||||
struct task;
|
||||
struct gate;
|
||||
|
||||
extern log::log log;
|
||||
}
|
||||
|
@ -27,7 +27,6 @@ namespace ircd::gpt
|
|||
#include "vocab.h"
|
||||
#include "model.h"
|
||||
#include "token.h"
|
||||
#include "opts.h"
|
||||
#include "task.h"
|
||||
#include "task/task.h"
|
||||
#include "pipe/pipe.h"
|
||||
#include "generate.h"
|
||||
|
|
|
@ -1,277 +0,0 @@
|
|||
// Matrix Construct
|
||||
//
|
||||
// Copyright (C) Matrix Construct Developers, Authors & Contributors
|
||||
// Copyright (C) 2016-2021 Jason Volk <jason@zemos.net>
|
||||
//
|
||||
// Permission to use, copy, modify, and/or distribute this software for any
|
||||
// purpose with or without fee is hereby granted, provided that the above
|
||||
// copyright notice and this permission notice is present in all copies. The
|
||||
// full license for this software is available in the LICENSE file.
|
||||
|
||||
#pragma once
|
||||
#define HAVE_IRCD_GPT_OPTS_H
|
||||
|
||||
/// Task Options Page
|
||||
///
|
||||
/// The option block is directly shared with task software as constant data.
|
||||
/// This stucture and its mutable companion in `task.h` determine the outcome
|
||||
/// of the next execution cycle; options are immutable to device software but
|
||||
/// may be changed by the host between executions cycles if desired.
|
||||
///
|
||||
struct ircd_gpt_opts
|
||||
{
|
||||
/// Specifies the nominal halting condition based on a sequence of tokens.
|
||||
/// Generation will complete with success after one of these sequences is
|
||||
/// witnessed. Set tokens to -1 starting from the back for shorter
|
||||
/// sequences; zero-length sequences (all -1's) are never matched.
|
||||
uint accept_code[4][4]
|
||||
#ifdef __cplusplus
|
||||
{
|
||||
{ 13, 198, -1U, -1U, },
|
||||
{ 198, 198, -1U, -1U, },
|
||||
{ -1U, -1U, -1U, -1U, },
|
||||
{ -1U, -1U, -1U, -1U, },
|
||||
}
|
||||
#endif
|
||||
;
|
||||
|
||||
/// Specifies the exceptional halting condition based on the sequence of
|
||||
/// tokens. By default, the three zeros represent three outputs of '!'
|
||||
/// which is probably an error; note that a true "!!!" is represented by
|
||||
/// token number 10185. Set tokens to -1 starting from the back to not
|
||||
/// match that token; generated output after errors is usually garbage.
|
||||
uint error_code[4][4]
|
||||
#ifdef __cplusplus
|
||||
{
|
||||
{ 0, 0, 0, -1U, },
|
||||
{ -1U, -1U, -1U, -1U, },
|
||||
{ -1U, -1U, -1U, -1U, },
|
||||
{ -1U, -1U, -1U, -1U, },
|
||||
}
|
||||
#endif
|
||||
;
|
||||
|
||||
/// Limit number of output tokens. Default of -1 is unlimited; the number
|
||||
/// of tokens generated will be limited by other factors.
|
||||
uint limit
|
||||
#ifdef __cplusplus
|
||||
{
|
||||
1
|
||||
}
|
||||
#endif
|
||||
;
|
||||
|
||||
/// Flip random coins over the top k logits each round. Setting to 1
|
||||
/// deterministically selects the top logit.
|
||||
uint top_k
|
||||
#ifdef __cplusplus
|
||||
{
|
||||
2
|
||||
}
|
||||
#endif
|
||||
;
|
||||
|
||||
/// Specifies the token context size in tokens.
|
||||
uint context_tokens
|
||||
#ifdef __cplusplus
|
||||
{
|
||||
1024
|
||||
}
|
||||
#endif
|
||||
;
|
||||
|
||||
/// Specifies the token buffer size in tokens.
|
||||
uint buffer_tokens
|
||||
#ifdef __cplusplus
|
||||
{
|
||||
1024
|
||||
}
|
||||
#endif
|
||||
;
|
||||
|
||||
/// Embedding vector elements
|
||||
uint embed_elems
|
||||
#ifdef __cplusplus
|
||||
{
|
||||
768
|
||||
}
|
||||
#endif
|
||||
;
|
||||
|
||||
/// Attention unit fcon width multiple
|
||||
uint attn_mult
|
||||
#ifdef __cplusplus
|
||||
{
|
||||
3U
|
||||
}
|
||||
#endif
|
||||
;
|
||||
|
||||
/// MLP unit fcon width multiple
|
||||
uint ffnn_mult
|
||||
#ifdef __cplusplus
|
||||
{
|
||||
4U
|
||||
}
|
||||
#endif
|
||||
;
|
||||
|
||||
/// Attention unit width multiple
|
||||
uint attn_elems
|
||||
#ifdef __cplusplus
|
||||
{
|
||||
embed_elems * attn_mult
|
||||
}
|
||||
#endif
|
||||
;
|
||||
|
||||
/// FFNN unit width multiple
|
||||
uint ffnn_elems
|
||||
#ifdef __cplusplus
|
||||
{
|
||||
embed_elems * ffnn_mult
|
||||
}
|
||||
#endif
|
||||
;
|
||||
|
||||
/// SIMD lane count
|
||||
uint lanes
|
||||
#ifdef __cplusplus
|
||||
{
|
||||
4U
|
||||
}
|
||||
#endif
|
||||
;
|
||||
|
||||
uint embed_width
|
||||
#ifdef __cplusplus
|
||||
{
|
||||
embed_elems / lanes
|
||||
}
|
||||
#endif
|
||||
;
|
||||
|
||||
uint attn_width
|
||||
#ifdef __cplusplus
|
||||
{
|
||||
attn_elems / lanes
|
||||
}
|
||||
#endif
|
||||
;
|
||||
|
||||
uint attn_height
|
||||
#ifdef __cplusplus
|
||||
{
|
||||
embed_elems / lanes
|
||||
}
|
||||
#endif
|
||||
;
|
||||
|
||||
uint ffnn_width
|
||||
#ifdef __cplusplus
|
||||
{
|
||||
ffnn_elems / lanes
|
||||
}
|
||||
#endif
|
||||
;
|
||||
|
||||
uint ffnn_height
|
||||
#ifdef __cplusplus
|
||||
{
|
||||
embed_elems / lanes
|
||||
}
|
||||
#endif
|
||||
;
|
||||
|
||||
/// Specifies the token context size in tokens.
|
||||
uint logits
|
||||
#ifdef __cplusplus
|
||||
{
|
||||
50257
|
||||
}
|
||||
#endif
|
||||
;
|
||||
|
||||
/// Seed for the task's PRNG.
|
||||
ulong seed
|
||||
#ifdef __cplusplus
|
||||
{
|
||||
1234567890UL
|
||||
}
|
||||
#endif
|
||||
;
|
||||
|
||||
/// Training steps
|
||||
ulong training_steps
|
||||
#ifdef __cplusplus
|
||||
{
|
||||
250000
|
||||
}
|
||||
#endif
|
||||
;
|
||||
|
||||
/// Validation steps
|
||||
ulong validation_steps
|
||||
#ifdef __cplusplus
|
||||
{
|
||||
5000
|
||||
}
|
||||
#endif
|
||||
;
|
||||
|
||||
ushort label
|
||||
#ifdef __cplusplus
|
||||
{
|
||||
198
|
||||
}
|
||||
#endif
|
||||
;
|
||||
|
||||
float alpha
|
||||
#ifdef __cplusplus
|
||||
{
|
||||
0.001
|
||||
}
|
||||
#endif
|
||||
;
|
||||
|
||||
float beta[2]
|
||||
#ifdef __cplusplus
|
||||
{
|
||||
0.9, // Beta1
|
||||
0.999, // Beta2
|
||||
}
|
||||
#endif
|
||||
;
|
||||
|
||||
float epsilon
|
||||
#ifdef __cplusplus
|
||||
{
|
||||
0.000001
|
||||
}
|
||||
#endif
|
||||
;
|
||||
}
|
||||
__attribute__((aligned(4096)));
|
||||
|
||||
#ifdef __cplusplus
|
||||
/// Generator Task Options.
|
||||
///
|
||||
/// Parameters for a task. Options are constant and one instance can be shared
|
||||
/// between multiple task instances. This structure extends the task options
|
||||
/// page, starting a new page which is not visible to device software; C++ and
|
||||
/// host pointers are available.
|
||||
///
|
||||
struct ircd::gpt::opts
|
||||
:ircd_gpt_opts
|
||||
{
|
||||
/// Pointer to the model
|
||||
const model::decoder *model
|
||||
{
|
||||
model::default_model
|
||||
};
|
||||
};
|
||||
|
||||
static_assert(sizeof(struct ircd_gpt_opts) == 4096);
|
||||
static_assert(std::is_standard_layout<struct ircd_gpt_opts>::value);
|
||||
#endif
|
|
@ -1,166 +0,0 @@
|
|||
// Matrix Construct
|
||||
//
|
||||
// Copyright (C) Matrix Construct Developers, Authors & Contributors
|
||||
// Copyright (C) 2016-2021 Jason Volk <jason@zemos.net>
|
||||
//
|
||||
// Permission to use, copy, modify, and/or distribute this software for any
|
||||
// purpose with or without fee is hereby granted, provided that the above
|
||||
// copyright notice and this permission notice is present in all copies. The
|
||||
// full license for this software is available in the LICENSE file.
|
||||
|
||||
#pragma once
|
||||
#define HAVE_IRCD_GPT_TASK_H
|
||||
|
||||
/// Task Control Page
|
||||
///
|
||||
/// The control block is shared with our device software. Execution state is
|
||||
/// maintained in the task control block across cycles. The control block is
|
||||
/// the mutable state component for an execution; for the immutable component
|
||||
/// also shared with device software see opts.h.
|
||||
///
|
||||
struct ircd_gpt_task
|
||||
{
|
||||
/// Header magic 0xC7012C70
|
||||
uint magic;
|
||||
|
||||
/// Hypercall code set by our device software upon completion and control
|
||||
/// transfer back to the host. Negative codes indicate errors, positive
|
||||
/// codes are used for status and/or procedure calls; zero is also an error.
|
||||
enum ircd_gpt_hypercall call;
|
||||
|
||||
/// Token ring head. Tokens in the ring extend behind the head for
|
||||
/// `tokens`. The `head` value is automatically modulated by device
|
||||
/// software to wrap around the ring.
|
||||
uint head;
|
||||
|
||||
/// Token counter. The counter indicates the number of valid tokens in
|
||||
/// the context buffer. This value must not exceed the buffer size.
|
||||
uint tokens;
|
||||
|
||||
/// Accumulates the number of task cycles. The cycle counter is incremented
|
||||
/// by device software after each repetition of the kernel pipeline to
|
||||
/// produce one additional token.
|
||||
ulong cycle;
|
||||
|
||||
/// Accumulates the epoch count for the task. The counter is incremented
|
||||
/// by one in device software before control returns back to the host.
|
||||
/// Several cycles may occur during each epoch.
|
||||
ulong epoch;
|
||||
|
||||
/// Accumulates the training epoch count for the task. The counter is
|
||||
/// incremented by one in device software for each backward propagation.
|
||||
ulong step;
|
||||
|
||||
/// Accumulates the number of tokens produced by the task. Several tokens
|
||||
/// may be produced each epoch, but currently only one token is produced
|
||||
/// each cycle.
|
||||
ulong produced;
|
||||
|
||||
/// Accumulates the number tokens witnessed by the task. The number of
|
||||
/// tokens in the context for each cycle is counted as witnessed.
|
||||
ulong witnessed;
|
||||
|
||||
/// Accumulates time in microseconds elapsed for the task.
|
||||
ulong elapsed;
|
||||
|
||||
/// PRNG xoshiro256 state. This is the de facto random seed which can be
|
||||
/// set before cycle entry by the host. It is updated by device software
|
||||
/// when used.
|
||||
ulong rand[4];
|
||||
|
||||
/// Updated by the host with the value of the timestamp register as sampled
|
||||
/// immediately before each transfer of control to the device.
|
||||
ulong host_tsc;
|
||||
|
||||
/// State counters for the accept/error sequence codes.
|
||||
uint accept_seq[4], error_seq[4];
|
||||
|
||||
/// Logit softmax mu
|
||||
float samax_mu;
|
||||
|
||||
/// Logit softmax sum
|
||||
float samax_sum;
|
||||
|
||||
/// Logit softmax lambda
|
||||
float samax_lambda;
|
||||
|
||||
/// Loss for last token of last cycle
|
||||
float loss;
|
||||
|
||||
/// Sum loss over all cycles
|
||||
float loss_sum[4];
|
||||
|
||||
/// Average loss over all cycles
|
||||
float loss_mean;
|
||||
|
||||
/// Perplexity score for last token of last cycle
|
||||
float perp;
|
||||
|
||||
/// Sum ppl over all cycles
|
||||
float perp_sum[4];
|
||||
|
||||
/// Perplexity mean over context
|
||||
float perp_mean;
|
||||
|
||||
/// Certainty difference score for last token of last cycle
|
||||
float cert;
|
||||
|
||||
/// Sum certainty over all cycles
|
||||
float cert_sum[4];
|
||||
|
||||
/// Certainty mean over context
|
||||
float cert_mean;
|
||||
|
||||
/// Final loss
|
||||
float l2_loss;
|
||||
|
||||
/// Final loss mean
|
||||
float l2_loss_mean;
|
||||
|
||||
/// Perform backprop
|
||||
bool prop;
|
||||
|
||||
/// The token buffer starts at offset 2048 and continues to the end of
|
||||
/// the page; options specify the size of the tokens buffer in tokens.
|
||||
/// Additional pages must be attached for larger buffer sizes.
|
||||
ushort token[] __attribute__((aligned(2048)));
|
||||
}
|
||||
__attribute__((aligned(4096)));
|
||||
|
||||
#ifdef __cplusplus
|
||||
/// Task Context
|
||||
///
|
||||
/// State for a task.
|
||||
struct ircd::gpt::task
|
||||
{
|
||||
enum status :char;
|
||||
|
||||
/// Reference to the attached options.
|
||||
const gpt::opts *opts {nullptr};
|
||||
|
||||
/// Reference to control pages.
|
||||
struct ircd_gpt_task *ctrl {nullptr};
|
||||
|
||||
/// Current task status.
|
||||
enum status status {'\0'};
|
||||
|
||||
task(const gpt::opts * = nullptr,
|
||||
struct ircd_gpt_task * = nullptr);
|
||||
|
||||
~task() noexcept;
|
||||
};
|
||||
|
||||
/// The current status of a task is indicated with intelligible characters
|
||||
enum ircd::gpt::task::status
|
||||
:char
|
||||
{
|
||||
QUEUED = 'Q', ///< Queued for execution.
|
||||
RUNNING = 'R', ///< Currently being executed.
|
||||
ACCEPT = 'A', ///< Execution completed successfully.
|
||||
ERROR = 'E', ///< Execution did not complete successfully.
|
||||
};
|
||||
|
||||
static_assert(sizeof(struct ircd_gpt_task) == 4096);
|
||||
static_assert(offsetof(struct ircd_gpt_task, token) == 2048);
|
||||
static_assert(std::is_standard_layout<struct ircd_gpt_task>::value);
|
||||
#endif
|
58
include/ircd/gpt/task/ctrl.h
Normal file
58
include/ircd/gpt/task/ctrl.h
Normal file
|
@ -0,0 +1,58 @@
|
|||
// Matrix Construct
|
||||
//
|
||||
// Copyright (C) Matrix Construct Developers, Authors & Contributors
|
||||
// Copyright (C) 2016-2021 Jason Volk <jason@zemos.net>
|
||||
//
|
||||
// Permission to use, copy, modify, and/or distribute this software for any
|
||||
// purpose with or without fee is hereby granted, provided that the above
|
||||
// copyright notice and this permission notice is present in all copies. The
|
||||
// full license for this software is available in the LICENSE file.
|
||||
|
||||
#pragma once
|
||||
#define HAVE_IRCD_GPT_TASK_CTRL_H
|
||||
|
||||
/// Task Control Page
|
||||
///
|
||||
/// The control block is shared with our device software. Execution state is
|
||||
/// maintained in the task control block across cycles. The control block is
|
||||
/// the mutable state component for an execution; for the immutable component
|
||||
/// also shared with device software see opts.h.
|
||||
///
|
||||
struct ircd_gpt_task
|
||||
{
|
||||
/// Epoch counting & interrupt control block.
|
||||
struct ircd_gpt_task_epic epic;
|
||||
|
||||
/// Token context control block. Contains state for the token context
|
||||
/// buffer; the buffer with the tokens themselves is elsewhere.
|
||||
struct ircd_gpt_task_tokens tokens;
|
||||
|
||||
/// Logit softmax state
|
||||
struct ircd_math_samax samax;
|
||||
|
||||
/// Target label loss state
|
||||
struct ircd_math_mean loss;
|
||||
|
||||
/// Target label perplexity score state
|
||||
struct ircd_math_mean perp;
|
||||
|
||||
/// Target label certainty difference state
|
||||
struct ircd_math_mean cert;
|
||||
|
||||
/// PRNG xoshiro256 state. This is the de facto random seed which can be
|
||||
/// set before cycle entry by the host. It is updated by device software
|
||||
/// when used.
|
||||
ulong rand[4];
|
||||
|
||||
/// Perform backprop
|
||||
bool prop;
|
||||
|
||||
/// Header magic 0xC7012C70
|
||||
uint magic;
|
||||
|
||||
/// The token buffer starts at offset 2048 and continues to the end of
|
||||
/// the page; options specify the size of the tokens buffer in tokens.
|
||||
/// Additional pages must be attached for larger buffer sizes.
|
||||
ushort token[] __attribute__((aligned(2048)));
|
||||
}
|
||||
__attribute__((aligned(4096)));
|
38
include/ircd/gpt/task/epic.h
Normal file
38
include/ircd/gpt/task/epic.h
Normal file
|
@ -0,0 +1,38 @@
|
|||
// Matrix Construct
|
||||
//
|
||||
// Copyright (C) Matrix Construct Developers, Authors & Contributors
|
||||
// Copyright (C) 2016-2021 Jason Volk <jason@zemos.net>
|
||||
//
|
||||
// Permission to use, copy, modify, and/or distribute this software for any
|
||||
// purpose with or without fee is hereby granted, provided that the above
|
||||
// copyright notice and this permission notice is present in all copies. The
|
||||
// full license for this software is available in the LICENSE file.
|
||||
|
||||
#pragma once
|
||||
#define HAVE_IRCD_GPT_TASK_EPIC_H
|
||||
|
||||
/// Epoch Precision Interrupt Controller
|
||||
///
|
||||
struct ircd_gpt_task_epic
|
||||
{
|
||||
/// Accumulates the number of task cycles. The cycle counter is incremented
|
||||
/// by device software after each repetition of the kernel pipeline to
|
||||
/// produce one additional token.
|
||||
ulong cycle;
|
||||
|
||||
/// Accumulates the epoch count for the task. The counter is incremented
|
||||
/// by one in device software before control returns back to the host.
|
||||
/// Several cycles may occur during each epoch.
|
||||
ulong epoch;
|
||||
|
||||
/// Accumulates the training epoch count for the task. The counter is
|
||||
/// incremented by one in device software for each backward propagation.
|
||||
ulong step;
|
||||
|
||||
/// Updated by the host with the value of the timestamp register as sampled
|
||||
/// immediately before each transfer of control to the device.
|
||||
ulong host_tsc;
|
||||
|
||||
/// Accumulates time in microseconds elapsed for the task.
|
||||
ulong elapsed;
|
||||
};
|
30
include/ircd/gpt/task/gate.h
Normal file
30
include/ircd/gpt/task/gate.h
Normal file
|
@ -0,0 +1,30 @@
|
|||
// Matrix Construct
|
||||
//
|
||||
// Copyright (C) Matrix Construct Developers, Authors & Contributors
|
||||
// Copyright (C) 2016-2021 Jason Volk <jason@zemos.net>
|
||||
//
|
||||
// Permission to use, copy, modify, and/or distribute this software for any
|
||||
// purpose with or without fee is hereby granted, provided that the above
|
||||
// copyright notice and this permission notice is present in all copies. The
|
||||
// full license for this software is available in the LICENSE file.
|
||||
|
||||
#pragma once
|
||||
#define HAVE_IRCD_GPT_GATE_H
|
||||
|
||||
/// Task Gate Descriptor
|
||||
///
|
||||
struct ircd_gpt_gate
|
||||
{
|
||||
ushort code[8];
|
||||
}
|
||||
__attribute__((aligned(16)));
|
||||
|
||||
#ifdef __cplusplus
|
||||
struct ircd::gpt::gate
|
||||
:ircd_gpt_gate
|
||||
{
|
||||
gate()
|
||||
:ircd_gpt_gate{0}
|
||||
{}
|
||||
};
|
||||
#endif
|
123
include/ircd/gpt/task/opts.h
Normal file
123
include/ircd/gpt/task/opts.h
Normal file
|
@ -0,0 +1,123 @@
|
|||
// Matrix Construct
|
||||
//
|
||||
// Copyright (C) Matrix Construct Developers, Authors & Contributors
|
||||
// Copyright (C) 2016-2021 Jason Volk <jason@zemos.net>
|
||||
//
|
||||
// Permission to use, copy, modify, and/or distribute this software for any
|
||||
// purpose with or without fee is hereby granted, provided that the above
|
||||
// copyright notice and this permission notice is present in all copies. The
|
||||
// full license for this software is available in the LICENSE file.
|
||||
|
||||
#pragma once
|
||||
#define HAVE_IRCD_GPT_OPTS_H
|
||||
|
||||
/// Task Options Page
|
||||
///
|
||||
/// The option block is directly shared with task software as constant data.
|
||||
/// This stucture and its mutable companion in `task.h` determine the outcome
|
||||
/// of the next execution cycle; options are immutable to device software but
|
||||
/// may be changed by the host between executions cycles if desired.
|
||||
///
|
||||
struct ircd_gpt_opts
|
||||
{
|
||||
#ifdef __cplusplus
|
||||
ircd_gpt_opts(const ircd::gpt::model::decoder * = nullptr) noexcept;
|
||||
#endif
|
||||
|
||||
/// Reference to the model (currently not available in device software).
|
||||
#ifndef __cplusplus
|
||||
const intptr_t model;
|
||||
#else
|
||||
const ircd::gpt::model::decoder *model;
|
||||
#endif
|
||||
|
||||
/// Limit number of output tokens. Default of -1 is unlimited; the number
|
||||
/// of tokens generated will be limited by other factors.
|
||||
uint limit;
|
||||
|
||||
/// Flip random coins over the top k logits each round. Setting to 1
|
||||
/// deterministically selects the top logit.
|
||||
uint top_k;
|
||||
|
||||
/// Specifies the token context size in tokens.
|
||||
uint context_tokens;
|
||||
|
||||
/// Specifies the token buffer size in tokens.
|
||||
uint buffer_tokens;
|
||||
|
||||
/// Embedding vector elements
|
||||
uint embed_elems;
|
||||
|
||||
/// Attention unit fcon width multiple
|
||||
uint attn_mult;
|
||||
|
||||
/// (computed) MLP unit fcon width multiple
|
||||
uint ffnn_mult;
|
||||
|
||||
/// (computed) attention unit width multiple
|
||||
uint attn_elems;
|
||||
|
||||
/// FFNN unit width multiple
|
||||
uint ffnn_elems;
|
||||
|
||||
/// SIMD lane count
|
||||
uint lanes;
|
||||
|
||||
/// (computed) `embed_elems` / `lanes`
|
||||
uint embed_width;
|
||||
|
||||
/// (computed) Attention unit X dimension
|
||||
uint attn_width;
|
||||
|
||||
/// (computed) Attention unit Y dimension
|
||||
uint attn_height;
|
||||
|
||||
/// (computed) MLP backend X dimension
|
||||
uint ffnn_width;
|
||||
|
||||
/// (computed) MLP backend Y dimension
|
||||
uint ffnn_height;
|
||||
|
||||
/// Number of possible target n-grams.
|
||||
uint logits;
|
||||
|
||||
/// Seed for the task's PRNG.
|
||||
ulong seed;
|
||||
|
||||
/// Training steps
|
||||
ulong training_steps;
|
||||
|
||||
/// Validation steps
|
||||
ulong validation_steps;
|
||||
|
||||
/// Target label
|
||||
ushort label;
|
||||
|
||||
/// Learning rate
|
||||
float alpha;
|
||||
|
||||
/// Decay rate
|
||||
float beta[2];
|
||||
|
||||
/// Denorm smoothing
|
||||
float epsilon;
|
||||
|
||||
/// Number of gate descriptors attached to this page.
|
||||
uint gates;
|
||||
|
||||
/// The gate descriptor table starts at offset 2048 and continues to the
|
||||
/// end of the page. For more descriptors additional pages must be
|
||||
/// attached.
|
||||
struct ircd_gpt_gate gate[] __attribute__((aligned(2048)));
|
||||
}
|
||||
__attribute__((aligned(4096)));
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace ircd::gpt
|
||||
{
|
||||
using opts = struct ircd_gpt_opts;
|
||||
}
|
||||
|
||||
static_assert(sizeof(struct ircd_gpt_opts) == 4096);
|
||||
static_assert(std::is_standard_layout<struct ircd_gpt_opts>::value);
|
||||
#endif
|
56
include/ircd/gpt/task/task.h
Normal file
56
include/ircd/gpt/task/task.h
Normal file
|
@ -0,0 +1,56 @@
|
|||
// Matrix Construct
|
||||
//
|
||||
// Copyright (C) Matrix Construct Developers, Authors & Contributors
|
||||
// Copyright (C) 2016-2021 Jason Volk <jason@zemos.net>
|
||||
//
|
||||
// Permission to use, copy, modify, and/or distribute this software for any
|
||||
// purpose with or without fee is hereby granted, provided that the above
|
||||
// copyright notice and this permission notice is present in all copies. The
|
||||
// full license for this software is available in the LICENSE file.
|
||||
|
||||
#pragma once
|
||||
#define HAVE_IRCD_GPT_TASK_H
|
||||
|
||||
#include "epic.h"
|
||||
#include "tokens.h"
|
||||
#include "gate.h"
|
||||
#include "opts.h"
|
||||
#include "ctrl.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
/// Task Context
|
||||
///
|
||||
/// State for a task.
|
||||
struct ircd::gpt::task
|
||||
{
|
||||
enum status :char;
|
||||
|
||||
/// Reference to the attached options.
|
||||
const gpt::opts *opts {nullptr};
|
||||
|
||||
/// Reference to control pages.
|
||||
struct ircd_gpt_task *ctrl {nullptr};
|
||||
|
||||
/// Current task status.
|
||||
enum status status {'\0'};
|
||||
|
||||
task(const gpt::opts * = nullptr,
|
||||
struct ircd_gpt_task * = nullptr);
|
||||
|
||||
~task() noexcept;
|
||||
};
|
||||
|
||||
/// The current status of a task is indicated with intelligible characters
|
||||
enum ircd::gpt::task::status
|
||||
:char
|
||||
{
|
||||
QUEUED = 'Q', ///< Queued for execution.
|
||||
RUNNING = 'R', ///< Currently being executed.
|
||||
ACCEPT = 'A', ///< Execution completed successfully.
|
||||
ERROR = 'E', ///< Execution did not complete successfully.
|
||||
};
|
||||
|
||||
static_assert(sizeof(struct ircd_gpt_task) == 4096);
|
||||
static_assert(offsetof(struct ircd_gpt_task, token) == 2048);
|
||||
static_assert(std::is_standard_layout<struct ircd_gpt_task>::value);
|
||||
#endif
|
35
include/ircd/gpt/task/tokens.h
Normal file
35
include/ircd/gpt/task/tokens.h
Normal file
|
@ -0,0 +1,35 @@
|
|||
// Matrix Construct
|
||||
//
|
||||
// Copyright (C) Matrix Construct Developers, Authors & Contributors
|
||||
// Copyright (C) 2016-2021 Jason Volk <jason@zemos.net>
|
||||
//
|
||||
// Permission to use, copy, modify, and/or distribute this software for any
|
||||
// purpose with or without fee is hereby granted, provided that the above
|
||||
// copyright notice and this permission notice is present in all copies. The
|
||||
// full license for this software is available in the LICENSE file.
|
||||
|
||||
#pragma once
|
||||
#define HAVE_IRCD_GPT_TASK_TOKENS_H
|
||||
|
||||
/// Token Context Buffer (Control Block)
|
||||
///
|
||||
struct ircd_gpt_task_tokens
|
||||
{
|
||||
/// Token ring head. Tokens in the ring extend behind the head for
|
||||
/// `tokens`. The `head` value is automatically modulated by device
|
||||
/// software to wrap around the ring.
|
||||
uint head;
|
||||
|
||||
/// Token counter. The counter indicates the number of valid tokens in
|
||||
/// the context buffer. This value must not exceed the buffer size.
|
||||
uint count;
|
||||
|
||||
/// Accumulates the number of tokens produced by the task. Several tokens
|
||||
/// may be produced each epoch, but currently only one token is produced
|
||||
/// each cycle.
|
||||
ulong produced;
|
||||
|
||||
/// Accumulates the number tokens witnessed by the task. The number of
|
||||
/// tokens in the context for each cycle is counted as witnessed.
|
||||
ulong witnessed;
|
||||
};
|
309
ircd/gpt.cc
309
ircd/gpt.cc
|
@ -88,67 +88,29 @@ ircd::gpt::generate(const vector_view<u16> &out,
|
|||
|
||||
const auto &opts(*task.opts);
|
||||
auto &ctrl(*task.ctrl);
|
||||
auto &errc(ctrl.error_seq);
|
||||
auto &accc(ctrl.accept_seq);
|
||||
ctrl.tokens = in.size();
|
||||
ctrl.head = 0;
|
||||
|
||||
const size_t tmax
|
||||
{
|
||||
in.size() + opts.limit
|
||||
};
|
||||
|
||||
const vector_view<f32> accum
|
||||
{
|
||||
gpt::scratch, tmax * 768
|
||||
};
|
||||
|
||||
const vector_view<f32> embeds
|
||||
{
|
||||
gpt::embeds, tmax * 768
|
||||
};
|
||||
ctrl.tokens.count = 0;
|
||||
ctrl.tokens.head = 0;
|
||||
|
||||
for(uint j(0); j < in.size(); ++j)
|
||||
ctrl.token[ctrl.tokens.count++] = in[j];
|
||||
|
||||
for(uint i(0); i < opts.gates; ++i)
|
||||
for(uint k(0); k < 8; ++k)
|
||||
{
|
||||
if(ctrl.tokens.count >= opts.buffer_tokens)
|
||||
break;
|
||||
|
||||
if(opts.gate[i].code[k] == 0)
|
||||
break;
|
||||
|
||||
ctrl.token[ctrl.tokens.count] = opts.gate[i].code[k];
|
||||
ctrl.tokens.count++;
|
||||
}
|
||||
|
||||
const size_t in_size
|
||||
{
|
||||
const vector_view<f32> dst
|
||||
{
|
||||
data(embeds) + j * 768, 768
|
||||
};
|
||||
|
||||
if(ircd::cl::enable)
|
||||
ctrl.token[j] = in[j];
|
||||
else
|
||||
embed(data(dst), in[j], j, opts);
|
||||
|
||||
#if 0 // RB_DEBUG
|
||||
static char dbuf[512] {0};
|
||||
char report[1536] {0};
|
||||
char tmbuf[1][64] {{0}};
|
||||
const size_t report_size = snprintf
|
||||
(
|
||||
report, sizeof(report),
|
||||
"%-4u %4u %4u:%-4u %1u%1u [ %6.2fL %6.2f%% ] %6.2fL %5.1f%% %s",
|
||||
ctrl.epoch,
|
||||
ctrl.cycle,
|
||||
j,
|
||||
ctrl.tokens,
|
||||
0,
|
||||
0,
|
||||
0.0,
|
||||
0.0,
|
||||
0.0,
|
||||
0.0,
|
||||
vocab::debug(dbuf, in[j]).c_str()
|
||||
);
|
||||
|
||||
log::logf
|
||||
{
|
||||
log, log::level::DEBUG,
|
||||
"%s",
|
||||
string_view{report, report_size}
|
||||
};
|
||||
#endif
|
||||
}
|
||||
ctrl.tokens.count
|
||||
};
|
||||
|
||||
uint64_t cycles(0);
|
||||
if(ctrl.prop)
|
||||
|
@ -170,7 +132,7 @@ ircd::gpt::generate(const vector_view<u16> &out,
|
|||
cycles
|
||||
};
|
||||
|
||||
backprop(task, ctrl.loss_mean, *model::default_model, momentum);
|
||||
backprop(task, ctrl.loss.mean, *model::default_model, momentum);
|
||||
}
|
||||
|
||||
if(ctrl.prop)
|
||||
|
@ -178,17 +140,17 @@ ircd::gpt::generate(const vector_view<u16> &out,
|
|||
log::debug
|
||||
{
|
||||
log, "Backpropagation of %2.6f in %lu cycles.",
|
||||
ctrl.loss_mean,
|
||||
ctrl.loss.mean,
|
||||
cycles,
|
||||
};
|
||||
|
||||
ctrl.epoch = 0;
|
||||
ctrl.loss_mean = 0;
|
||||
ctrl.loss = ctrl.loss_mean;
|
||||
ctrl.perp_mean = 0;
|
||||
ctrl.perp = ctrl.perp_mean;
|
||||
ctrl.cert_mean = 0;
|
||||
ctrl.cert = ctrl.cert_mean;
|
||||
ctrl.epic.epoch = 0;
|
||||
ctrl.loss.mean = 0;
|
||||
ctrl.loss.last = ctrl.loss.mean;
|
||||
ctrl.perp.mean = 0;
|
||||
ctrl.perp.last = ctrl.perp.mean;
|
||||
ctrl.cert.mean = 0;
|
||||
ctrl.cert.last = ctrl.cert.mean;
|
||||
ctrl.prop = false;
|
||||
pipe::default_model->invalid = true;
|
||||
return {};
|
||||
|
@ -206,73 +168,49 @@ ircd::gpt::generate(const vector_view<u16> &out,
|
|||
generate(task);
|
||||
}
|
||||
last_time = stopwatch.at<milliseconds>();
|
||||
ctrl.elapsed += last_time.count();
|
||||
ctrl.epic.elapsed += last_time.count();
|
||||
|
||||
/*
|
||||
coil(data(scratch), tokens, *opts.model);
|
||||
tail(logit, data(last_embed), *opts.model);
|
||||
out[i] = argmax(logit, *opts);
|
||||
*/
|
||||
|
||||
uint accc_thresh[3] {3, 3, 3};
|
||||
for(uint i(0); i < 3; ++i)
|
||||
for(uint j(3); j > 0; --j)
|
||||
if(opts.accept_code[i][j - 1] == -1U)
|
||||
--accc_thresh[i];
|
||||
else
|
||||
break;
|
||||
|
||||
uint errc_thresh[3] {3, 3, 3};
|
||||
for(uint i(0); i < 3; ++i)
|
||||
for(uint j(3); j > 0; --j)
|
||||
if(opts.error_code[i][j - 1] == -1U)
|
||||
--errc_thresh[i];
|
||||
else
|
||||
break;
|
||||
|
||||
for(auto &j(ret); j + in.size() < ctrl.tokens && j < out.size() && !halt; ++j)
|
||||
for(uint j(0); j < ctrl.tokens.count && ret < out.size() && !halt; ++j)
|
||||
{
|
||||
out[j] = ctrl.token[(in.size() + j + ctrl.head) % opts.buffer_tokens];
|
||||
const auto tok
|
||||
{
|
||||
ctrl.token[j]
|
||||
};
|
||||
|
||||
for(uint j(0); j < 3; ++j)
|
||||
errc[j] = opts.error_code[j][errc[j]] == out[j]?
|
||||
errc[j] + 1: 0;
|
||||
if(j >= in_size)
|
||||
out[ret++] = tok;
|
||||
|
||||
for(uint j(0); j < 3; ++j)
|
||||
accc[j] = opts.accept_code[j][accc[j]] == out[j]?
|
||||
accc[j] + 1: 0;
|
||||
|
||||
for(uint j(0); j < 3; ++j)
|
||||
halt |= accc_thresh[j] && accc[j] >= accc_thresh[j],
|
||||
halt |= errc_thresh[j] && errc[j] >= errc_thresh[j];
|
||||
if(j < in_size)
|
||||
continue;
|
||||
|
||||
static char dbuf[512] {0};
|
||||
char report[1536] {0};
|
||||
char tmbuf[4][64] {0};
|
||||
const size_t bsz(ctrl.tokens - in.size());
|
||||
const size_t bsz(ctrl.tokens.count - in_size);
|
||||
const size_t report_size = snprintf
|
||||
(
|
||||
report, sizeof(report),
|
||||
"%4lu:%-4u %4lu:%-4lu %6.1f%% %5.1fP %6.3fL [%c%c%c] %5u %6.3fL %6.2fP %5.1f%% %s %04x %8s %8s | %8s",
|
||||
j + in.size(),
|
||||
ctrl.tokens,
|
||||
ctrl.epoch,
|
||||
ctrl.cycle,
|
||||
std::clamp(ctrl.cert_mean * 100.0f, 0.0f, 100.0f),
|
||||
std::clamp(ctrl.perp_mean, 0.0f, 100.0f),
|
||||
std::clamp(ctrl.loss_mean, 0.0f, 99.99f),
|
||||
opts.label == out[j]? '+': ' ',
|
||||
accc[0] + accc[1] + accc[2] >= 3? 'A': ' ',
|
||||
errc[0] + errc[1] + errc[2] >= 3? 'E': ' ',
|
||||
"%-3u %4u:%-4u %4lu:%-4lu %6.1f%% %5.1fP %6.3fL [%c%c%c] %5u %6.3fL %6.2fP %5.1f%% %s %04x %8s %8s | %8s",
|
||||
j,
|
||||
ret - 1,
|
||||
ctrl.tokens.count,
|
||||
ctrl.epic.epoch,
|
||||
ctrl.epic.cycle,
|
||||
std::clamp(ctrl.cert.mean * 100.0f, 0.0f, 100.0f),
|
||||
std::clamp(ctrl.perp.mean, 0.0f, 100.0f),
|
||||
std::clamp(ctrl.loss.mean, 0.0f, 99.99f),
|
||||
opts.label == tok? '+': ' ',
|
||||
' ', // flag place
|
||||
' ', // flag place
|
||||
opts.label,
|
||||
std::clamp(ctrl.loss, 0.0f, 99.99f),
|
||||
std::clamp(ctrl.perp, 0.0f, 100.0f),
|
||||
std::clamp(ctrl.cert * 100.0f, 0.0f, 100.0f),
|
||||
vocab::debug(dbuf, out[j]).c_str(),
|
||||
out[j],
|
||||
std::clamp(ctrl.loss.last, 0.0f, 99.99f),
|
||||
std::clamp(ctrl.perp.last, 0.0f, 100.0f),
|
||||
std::clamp(ctrl.cert.last * 100.0f, 0.0f, 100.0f),
|
||||
vocab::debug(dbuf, tok).c_str(),
|
||||
tok,
|
||||
pretty(tmbuf[0], milliseconds(last_time / bsz), 1).c_str(),
|
||||
pretty(tmbuf[1], si(cycles / bsz), 1).c_str(),
|
||||
pretty(tmbuf[2], milliseconds(ctrl.elapsed), 1).c_str()
|
||||
pretty(tmbuf[2], milliseconds(ctrl.epic.elapsed), 1).c_str()
|
||||
);
|
||||
|
||||
log::logf
|
||||
|
@ -283,19 +221,6 @@ ircd::gpt::generate(const vector_view<u16> &out,
|
|||
};
|
||||
}
|
||||
|
||||
ret = ctrl.tokens - in.size();
|
||||
if ((false)) for(uint i(0); i < 3; ++i)
|
||||
if(accc_thresh[i] && ctrl.accept_seq[i] >= accc_thresh[i])
|
||||
{
|
||||
ret -= (3 - accc_thresh[i]);
|
||||
break;
|
||||
}
|
||||
else if(errc_thresh[i] && ctrl.error_seq[i] >= errc_thresh[i])
|
||||
{
|
||||
ret -= (3 - errc_thresh[i]);
|
||||
break;
|
||||
}
|
||||
|
||||
ctx::interruption_point();
|
||||
return vector_view<u16>
|
||||
{
|
||||
|
@ -689,6 +614,7 @@ ircd::gpt::gelu(f32x4 &out,
|
|||
// backside
|
||||
//
|
||||
|
||||
[[gnu::noinline]]
|
||||
size_t
|
||||
ircd::gpt::backprop(task &task,
|
||||
const f32 grad,
|
||||
|
@ -792,6 +718,7 @@ ircd::gpt::backprop(task &task,
|
|||
return off;
|
||||
}
|
||||
|
||||
[[gnu::noinline]]
|
||||
size_t
|
||||
ircd::gpt::adamw(task &task,
|
||||
const f32 grad,
|
||||
|
@ -820,7 +747,7 @@ ircd::gpt::adamw(task &task,
|
|||
};
|
||||
|
||||
for(uint i(0); i < num / 4; ++i)
|
||||
off = adamw(p[0][i], p[1][i], p[2][i], grad, opts.alpha, opts.beta[0], opts.beta[1], ctrl.step, off);
|
||||
off = adamw(p[0][i], p[1][i], p[2][i], grad, opts.alpha, opts.beta[0], opts.beta[1], ctrl.epic.step, off);
|
||||
|
||||
return off;
|
||||
}
|
||||
|
@ -915,19 +842,111 @@ noexcept
|
|||
}
|
||||
|
||||
//
|
||||
// hypercall
|
||||
// gpt::opts
|
||||
//
|
||||
|
||||
ircd::string_view
|
||||
ircd::gpt::reflect(const enum ircd_gpt_hypercall code)
|
||||
ircd_gpt_opts::ircd_gpt_opts(const ircd::gpt::model::decoder *const model)
|
||||
noexcept
|
||||
:model
|
||||
{
|
||||
model
|
||||
}
|
||||
,limit
|
||||
{
|
||||
-1U
|
||||
}
|
||||
,top_k
|
||||
{
|
||||
2U
|
||||
}
|
||||
,context_tokens
|
||||
{
|
||||
1024U
|
||||
}
|
||||
,buffer_tokens
|
||||
{
|
||||
1024U
|
||||
}
|
||||
,embed_elems
|
||||
{
|
||||
768U
|
||||
}
|
||||
,attn_mult
|
||||
{
|
||||
3U
|
||||
}
|
||||
,ffnn_mult
|
||||
{
|
||||
4U
|
||||
}
|
||||
,attn_elems
|
||||
{
|
||||
embed_elems * attn_mult
|
||||
}
|
||||
,ffnn_elems
|
||||
{
|
||||
embed_elems * ffnn_mult
|
||||
}
|
||||
,lanes
|
||||
{
|
||||
4U
|
||||
}
|
||||
,embed_width
|
||||
{
|
||||
embed_elems / lanes
|
||||
}
|
||||
,attn_width
|
||||
{
|
||||
attn_elems / lanes
|
||||
}
|
||||
,attn_height
|
||||
{
|
||||
embed_elems / lanes
|
||||
}
|
||||
,ffnn_width
|
||||
{
|
||||
ffnn_elems / lanes
|
||||
}
|
||||
,ffnn_height
|
||||
{
|
||||
embed_elems / lanes
|
||||
}
|
||||
,logits
|
||||
{
|
||||
50257
|
||||
}
|
||||
,seed
|
||||
{
|
||||
1234567890UL
|
||||
}
|
||||
,training_steps
|
||||
{
|
||||
250000
|
||||
}
|
||||
,validation_steps
|
||||
{
|
||||
5000
|
||||
}
|
||||
,label
|
||||
{
|
||||
198
|
||||
}
|
||||
,alpha
|
||||
{
|
||||
0.001f
|
||||
}
|
||||
,beta
|
||||
{
|
||||
0.9f,
|
||||
0.999f,
|
||||
}
|
||||
,epsilon
|
||||
{
|
||||
0.000001
|
||||
}
|
||||
,gates
|
||||
{
|
||||
0
|
||||
}
|
||||
{
|
||||
switch(code)
|
||||
{
|
||||
case IRCD_GPT_ACCEPT: return "ACCEPT";
|
||||
case IRCD_GPT_ECOMPLETE: return "ECOMPLETE";
|
||||
case IRCD_GPT_ETOKENS: return "ETOKENS";
|
||||
}
|
||||
|
||||
return "??????";
|
||||
}
|
||||
|
|
|
@ -432,7 +432,7 @@ _ircd_gpt_lm_embed(__global const struct ircd_gpt_task *const ctrl,
|
|||
const uint word_idx)
|
||||
{
|
||||
const ushort
|
||||
ring_idx = (ctrl->head + tok_idx) % opts->buffer_tokens,
|
||||
ring_idx = (ctrl->tokens.head + tok_idx) % opts->buffer_tokens,
|
||||
token = ctrl->token[ring_idx];
|
||||
|
||||
const float4
|
||||
|
@ -454,7 +454,7 @@ ircd_gpt_lm_embed(__global const struct ircd_gpt_task *const ctrl,
|
|||
wi = get_group_id(0),
|
||||
wn = get_num_groups(0);
|
||||
|
||||
for(uint i = 0; i < ctrl->tokens; ++i)
|
||||
for(uint i = 0; i < ctrl->tokens.count; ++i)
|
||||
if(i % wn == wi)
|
||||
_ircd_gpt_lm_embed(ctrl, opts, accum, pos, vocab, i, i, li);
|
||||
}
|
||||
|
@ -492,7 +492,7 @@ ircd_gpt_lm_logit(__global const struct ircd_gpt_task *const ctrl,
|
|||
{
|
||||
const uint
|
||||
gi = get_global_id(0),
|
||||
ti = ctrl->tokens - 1,
|
||||
ti = ctrl->tokens.count - 1,
|
||||
words = opts->embed_width;
|
||||
|
||||
float4 acc = 0.0f;
|
||||
|
@ -596,31 +596,16 @@ ircd_gpt_leave(__global struct ircd_gpt_task *const ctrl,
|
|||
__constant const struct ircd_gpt_opts *const opts,
|
||||
const uint li)
|
||||
{
|
||||
// If the call value has been set to something other than default we
|
||||
// do nothing else here.
|
||||
if(ctrl->call != IRCD_GPT_ECOMPLETE)
|
||||
return;
|
||||
|
||||
// No action for other threads right now
|
||||
if(li != 0)
|
||||
return;
|
||||
|
||||
// Run debug checks and assertions.
|
||||
#ifdef RB_DEBUG
|
||||
if(ctrl->call == IRCD_GPT_ECOMPLETE)
|
||||
if(ctrl->tokens < 2)
|
||||
ctrl->call = IRCD_GPT_ETOKENS;
|
||||
#endif
|
||||
|
||||
// On the last cycle, with no prior call or error code set, indicate
|
||||
// a nominal exit condition.
|
||||
if(ctrl->cycle + 1 >= opts->limit)
|
||||
{
|
||||
ctrl->call = IRCD_GPT_ACCEPT;
|
||||
ctrl->epoch += 1;
|
||||
}
|
||||
if(ctrl->epic.cycle + 1 >= opts->limit)
|
||||
ctrl->epic.epoch += 1;
|
||||
|
||||
ctrl->cycle += 1;
|
||||
ctrl->epic.cycle += 1;
|
||||
ctrl->magic = 0xC7012C70U;
|
||||
}
|
||||
|
||||
|
@ -634,10 +619,6 @@ ircd_gpt_lm_result(__global struct ircd_gpt_task *const ctrl,
|
|||
__global const float *const restrict logexp,
|
||||
__global const float *const restrict logit)
|
||||
{
|
||||
// When the hypercall code is already set, bail here.
|
||||
if(ctrl->call != IRCD_GPT_ECOMPLETE)
|
||||
return;
|
||||
|
||||
// To read from cells other than idx[0] we need this barrier.
|
||||
if(opts->top_k > 1)
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
@ -647,7 +628,7 @@ ircd_gpt_lm_result(__global struct ircd_gpt_task *const ctrl,
|
|||
return;
|
||||
|
||||
const bool
|
||||
buffer_full = ctrl->tokens >= opts->buffer_tokens;
|
||||
buffer_full = ctrl->tokens.count >= opts->buffer_tokens;
|
||||
|
||||
const ulong
|
||||
rnd = opts->top_k > 1?
|
||||
|
@ -657,20 +638,20 @@ ircd_gpt_lm_result(__global struct ircd_gpt_task *const ctrl,
|
|||
entro = max(opts->top_k, 1U),
|
||||
select = rnd % entro,
|
||||
token = idx[select],
|
||||
dest = (ctrl->head + ctrl->tokens) % opts->buffer_tokens,
|
||||
tokens = min(ctrl->tokens + 1, opts->buffer_tokens),
|
||||
dest = (ctrl->tokens.head + ctrl->tokens.count) % opts->buffer_tokens,
|
||||
tokens = min(ctrl->tokens.count + 1, opts->buffer_tokens),
|
||||
head = buffer_full?
|
||||
(ctrl->head + 1) % opts->buffer_tokens: ctrl->head;
|
||||
(ctrl->tokens.head + 1) % opts->buffer_tokens: ctrl->tokens.head;
|
||||
|
||||
ctrl->head = head;
|
||||
ctrl->tokens = tokens;
|
||||
ctrl->tokens.head = head;
|
||||
ctrl->tokens.count = tokens;
|
||||
ctrl->token[dest] = token;
|
||||
|
||||
const ushort
|
||||
ln = get_local_size(0),
|
||||
next_select = (select + 1) % ln,
|
||||
next_token = idx[next_select],
|
||||
sum_sel = ctrl->epoch % 3;
|
||||
sum_sel = ctrl->epic.epoch % 3;
|
||||
|
||||
const float
|
||||
test_lsm = logexp[opts->label],
|
||||
|
@ -737,7 +718,7 @@ ircd_gpt_prop_elem(__global const struct ircd_gpt_task *const ctrl,
|
|||
{
|
||||
const uint
|
||||
li = get_local_id(0),
|
||||
step = ctrl->step;
|
||||
step = ctrl->epic.step;
|
||||
|
||||
const float4
|
||||
param = param_[li],
|
||||
|
|
|
@ -129,12 +129,11 @@ ircd::gpt::generate(task &task)
|
|||
*task.ctrl
|
||||
};
|
||||
|
||||
ctrl.cycle = 0;
|
||||
ctrl.call = IRCD_GPT_ECOMPLETE;
|
||||
ctrl.host_tsc = prof::cycles();
|
||||
volatile const size_t tokens(ctrl.tokens);
|
||||
volatile const auto epoch(ctrl.epoch);
|
||||
volatile size_t cycle(ctrl.cycle);
|
||||
ctrl.epic.cycle = 0;
|
||||
ctrl.epic.host_tsc = prof::cycles();
|
||||
volatile const size_t tokens(ctrl.tokens.count);
|
||||
volatile const auto epoch(ctrl.epic.epoch);
|
||||
volatile size_t cycle(ctrl.epic.cycle);
|
||||
|
||||
std::deque<pipe::exec> list;
|
||||
for(; cycle < opts.limit; ++cycle)
|
||||
|
@ -151,8 +150,7 @@ ircd::gpt::generate(task &task)
|
|||
task, tokens + cycle, rel, acq
|
||||
);
|
||||
|
||||
// Conditions for a cl::flush here; this is not default but
|
||||
// may be configured to improve some workloads.
|
||||
// Conditions for a cl::flush here
|
||||
const bool flush
|
||||
{
|
||||
// Flushing here is enabled by the configuration
|
||||
|
@ -194,18 +192,8 @@ ircd::gpt::generate(task &task)
|
|||
list.clear();
|
||||
|
||||
assert(ctrl.magic == 0xC7012C70);
|
||||
assert(ctrl.epic.cycle == cycle || ctx::interruption_requested());
|
||||
this_ctx::interruption_point();
|
||||
|
||||
// Interp error codes
|
||||
if(unlikely(ctrl.call <= 0))
|
||||
throw error
|
||||
{
|
||||
"hyper (#%d) :%s",
|
||||
abs(int(ctrl.call)),
|
||||
reflect(ctrl.call),
|
||||
};
|
||||
|
||||
assert(ctrl.cycle == cycle || ctx::interruption_requested());
|
||||
}
|
||||
|
||||
void
|
||||
|
|
Loading…
Reference in a new issue