construct/ircd/gpt.cc

// Matrix Construct Is All You Need Is All You Need Is AllĊĊĊĊĊĊĊĊ
//
// Copyright (C) Matrix Construct Developers, Authors & Contributors
// Copyright (C) 2016-2021 Jason Volk <jason@zemos.net>
//
// Permission to use, copy, modify, and/or distribute this software for any
// purpose with or without fee is hereby granted, provided that the above
// copyright notice and this permission notice is present in all copies. The
// full license for this software is available in the LICENSE file.

namespace ircd::gpt
{
	size_t backprop(task &, const f32, model::decoder &, f32 *const (&)[2], size_t = 0);

	void generate_debug(task &, const uint &, const uint &);
}

decltype(ircd::gpt::log)
ircd::gpt::log
{
	"gpt"
};

ircd::string_view
ircd::gpt::generate(const mutable_buffer &out,
                    const string_view &in,
                    task &task)
{
	u16 buf[2][1024];
	const auto input_tokens
	{
		vocab::tokenize(buf[0], in)
	};

	const auto output_tokens
	{
		generate(buf[1], input_tokens, task)
	};

	const auto output
	{
		vocab::detokenize(out, output_tokens)
	};

	return output;
}

ircd::vector_view<ircd::u16>
ircd::gpt::generate(const vector_view<u16> &out,
                    const vector_view<const u16> &in,
                    task &task)
{
	assert(task.ctrl);
	assert(task.opts);

	uint ret(0);
	bool halt(false);

	const auto &opts(*task.opts);
	auto &ctrl(*task.ctrl);
	ctrl.tokens.count = 0;
	ctrl.tokens.head = 0;

	uint j(0);
	while(j < in.size() && ctrl.tokens.count < opts.buffer_tokens)
		ctrl.token[ctrl.tokens.count++] = in[j++];

	const size_t in_size
	{
		ctrl.tokens.count
	};

	generate(task);

	for(uint i(0); i < ctrl.tokens.count && ret < out.size() && !halt; ++i)
	{
		const auto j
		{
			(i + ctrl.tokens.head) % opts.buffer_tokens
		};

		const auto tok
		{
			ctrl.token[j]
		};

		if(j >= in_size)
			out[ret++] = tok;

		if(likely(~opts.debug & 0x01))
			continue;

		if(likely(~opts.debug & 0x02))
			if(j < in_size)
				continue;

		generate_debug(task, j, in_size);
	}

	ctx::interruption_point();
	return vector_view<u16>
	{
		out, ret
	};
}

void
ircd::gpt::generate(task &task)
{
	const auto &opts(*task.opts);
	auto &ctrl(*task.ctrl);

	const size_t in_size
	{
		ctrl.tokens.count
	};

	uint64_t cycles(0);
	if(ctrl.prop)
	{
		static f32 *_momentum[2];
		if(!_momentum[0])
		{
			_momentum[0] = new f32[sizeof(model::decoder) / 4] {0.0f};
			_momentum[1] = new f32[sizeof(model::decoder) / 4] {0.0f};
		}

		f32 *const momentum[2]
		{
			_momentum[0], _momentum[1],
		};

		const prof::scope_cycles task_cycles
		{
			cycles
		};

		backprop(task, ctrl.label[0].loss.mean, *model::default_model, momentum);
	}

	if(ctrl.prop)
	{
		log::debug
		{
			log, "Backpropagation of %2.6f in %lu cycles.",
			ctrl.label[0].loss.mean,
			cycles,
		};

		ctrl.epic.epoch = 0;
		ctrl.label[0].loss.mean = 0;
		ctrl.label[0].loss.last = ctrl.label[0].loss.mean;
		ctrl.label[0].perp.mean = 0;
		ctrl.label[0].perp.last = ctrl.label[0].perp.mean;
		ctrl.prop = false;
		pipe::default_model->invalid = true;
		return;
	}

	cycles = 0;
	util::timer stopwatch;
	{
		const prof::scope_cycles task_cycles
		{
			cycles
		};

		pipe::generate(task);
	}

	const milliseconds last_time
	{
		stopwatch.at<milliseconds>()
	};

	ctrl.epic.elapsed += last_time.count();
}

void
ircd::gpt::generate_debug(task &task,
                          const uint &i,
                          const uint &in_size)
{
	const auto &opts(*task.opts);
	auto &ctrl(*task.ctrl);

	const auto j
	{
		(i + ctrl.tokens.head) % opts.buffer_tokens
	};

	const auto tok
	{
		ctrl.token[j]
	};

	static char dbuf[512];
	static char report[1536];
	static char tmbuf[4][64];
	const size_t bsz(ctrl.tokens.count - in_size);
	const size_t report_size = snprintf
	(
		report, sizeof(report),
		"%-3u %-4u %4lu:%-4lu %6.1f%% %5.1fP %6.3fL [%c%c%c] %5u %6.3fL %6.2fP  %5.1f%% %s %04x  %8s %8s | %8s",
		j,
		ctrl.tokens.count,
		ctrl.epic.epoch,
		ctrl.epic.cycle,
		0.0f, // cert
		std::clamp(ctrl.label[0].perp.mean, 0.0f, 100.0f),
		std::clamp(ctrl.label[0].loss.mean, 0.0f, 99.99f),
		ctrl.label[0].token == tok? '+': ' ',
		' ', // flag place
		' ', // flag place
		ctrl.label[0].token,
		std::clamp(ctrl.label[0].loss.last, 0.0f, 99.99f),
		std::clamp(ctrl.label[0].perp.last, 0.0f, 100.0f),
		0.0f, // cert
		vocab::debug(dbuf, tok).c_str(),
		tok,
		pretty(tmbuf[0], milliseconds(0ms / bsz), 1).c_str(),
		pretty(tmbuf[1], si(0UL / bsz), 1).c_str(),
		pretty(tmbuf[2], milliseconds(ctrl.epic.elapsed), 1).c_str()
	);

	log::logf
	{
		log, log::level::DEBUG,
		"%s",
		string_view{report, report_size}
	};
}

//
// gpt::task
//

ircd::gpt::task::task(const gpt::opts *const opts,
                      gpt::ctrl *const ctrl)
:opts
{
	opts
}
,ctrl
{
	ctrl
}
,frame
{
	new gpt::ctrl[opts->frames]
}
{
	memset(ctrl, 0x0, sizeof(gpt::ctrl));
	seed(*this, this->opts->seed);
}

ircd::gpt::task::~task()
noexcept
{
}

//
// gpt::opts
//

ircd_gpt_opts::ircd_gpt_opts(const ircd::gpt::model::decoder *const model)
noexcept
:model
{
	model?: ircd::gpt::model::default_model
}
,seed
{
	1234567890UL
}
,limit
{
	-1U
}
,top_k
{
	2U
}
,top_p
{
	90U
}
,top_n
{
	16
}
,labels
{
	0
}
,debug
{
	0x01
}
,context_tokens
{
	1024U
}
,buffer_tokens
{
	1024U
}
,embed_elems
{
	768U
}
,attn_rank
{
	12U
}
,attn_mult
{
	3U
}
,ffnn_mult
{
	4U
}
,attn_elems
{
	embed_elems * attn_mult
}
,ffnn_elems
{
	embed_elems * ffnn_mult
}
,lanes
{
	4U
}
,layers
{
	12
}
,embed_width
{
	embed_elems / lanes
}
,attn_width
{
	attn_elems / lanes
}
,attn_height
{
	embed_elems / lanes
}
,ffnn_width
{
	ffnn_elems / lanes
}
,ffnn_height
{
	embed_elems / lanes
}
,logits
{
	50257
}
,training_steps
{
	250000
}
,validation_steps
{
	5000
}
,testing_steps
{
	5000
}
,alpha
{
	0.001f
}
,beta
{
	0.9f,
	0.999f,
}
,epsilon
{
	0.000001
}
{
}
ircd::gpt: More Matrix Than Matrix. 2021-03-05 02:03:33 +01:00			`// Matrix Construct Is All You Need Is All You Need Is AllĊĊĊĊĊĊĊĊ`
			`//`
			`// Copyright (C) Matrix Construct Developers, Authors & Contributors`
			`// Copyright (C) 2016-2021 Jason Volk <jason@zemos.net>`
			`//`
			`// Permission to use, copy, modify, and/or distribute this software for any`
			`// purpose with or without fee is hereby granted, provided that the above`
			`// copyright notice and this permission notice is present in all copies. The`
			`// full license for this software is available in the LICENSE file.`

			`namespace ircd::gpt`
			`{`
ircd::gpt: Splits and renames; various reorg. 2021-09-02 19:40:11 +02:00			`size_t backprop(task &, const f32, model::decoder &, f32 *const (&)[2], size_t = 0);`
ircd::gpt: Backpropagate adaptive moment estimations. 2021-04-22 21:20:58 +02:00
ircd::gpt: Splits and renames; various reorg. 2021-09-02 19:40:11 +02:00			`void generate_debug(task &, const uint &, const uint &);`
ircd::gpt: More Matrix Than Matrix. 2021-03-05 02:03:33 +01:00			`}`

ircd::gpt: Add basic interface; add options, context. 2021-03-09 11:08:47 +01:00			`decltype(ircd::gpt::log)`
			`ircd::gpt::log`
			`{`
			`"gpt"`
			`};`

			`ircd::string_view`
			`ircd::gpt::generate(const mutable_buffer &out,`
			`const string_view &in,`
ircd::gpt: Reorganize interface. 2021-04-02 22:01:38 +02:00			`task &task)`
ircd::gpt: More Matrix Than Matrix. 2021-03-05 02:03:33 +01:00			`{`
ircd::gpt: Reorganize interface. 2021-04-02 22:01:38 +02:00			`u16 buf[2][1024];`
ircd::gpt: Add basic interface; add options, context. 2021-03-09 11:08:47 +01:00			`const auto input_tokens`
			`{`
			`vocab::tokenize(buf[0], in)`
			`};`
ircd::gpt: More Matrix Than Matrix. 2021-03-05 02:03:33 +01:00
ircd::gpt: Add basic interface; add options, context. 2021-03-09 11:08:47 +01:00			`const auto output_tokens`
ircd::gpt: More Matrix Than Matrix. 2021-03-05 02:03:33 +01:00			`{`
ircd::gpt: Reorganize interface. 2021-04-02 22:01:38 +02:00			`generate(buf[1], input_tokens, task)`
ircd::gpt: Add basic interface; add options, context. 2021-03-09 11:08:47 +01:00			`};`

			`const auto output`
			`{`
			`vocab::detokenize(out, output_tokens)`
			`};`

			`return output;`
			`}`

			`ircd::vector_view<ircd::u16>`
			`ircd::gpt::generate(const vector_view<u16> &out,`
			`const vector_view<const u16> &in,`
ircd::gpt: Reorganize interface. 2021-04-02 22:01:38 +02:00			`task &task)`
ircd::gpt: Add basic interface; add options, context. 2021-03-09 11:08:47 +01:00			`{`
ircd::gpt: Reorganize interface. 2021-04-02 22:01:38 +02:00			`assert(task.ctrl);`
			`assert(task.opts);`
ircd::gpt: Abstractor various backend subroutines; generator loop. 2021-03-30 03:18:59 +02:00
			`uint ret(0);`
ircd::gpt: Add basic interface; add options, context. 2021-03-09 11:08:47 +01:00			`bool halt(false);`
ircd::gpt: Reorganize interface. 2021-04-02 22:01:38 +02:00
			`const auto &opts(*task.opts);`
			`auto &ctrl(*task.ctrl);`
ircd::gpt: Reorganize task options and control blocks. 2021-05-03 05:40:00 +02:00			`ctrl.tokens.count = 0;`
			`ctrl.tokens.head = 0;`
ircd::gpt: Abstractor various backend subroutines; generator loop. 2021-03-30 03:18:59 +02:00
ircd::gpt: Split debug related; improve flush options; minor cleanup. 2021-05-14 14:50:45 +02:00			`uint j(0);`
			`while(j < in.size() && ctrl.tokens.count < opts.buffer_tokens)`
			`ctrl.token[ctrl.tokens.count++] = in[j++];`

			`const size_t in_size`
			`{`
			`ctrl.tokens.count`
			`};`

			`generate(task);`

			`for(uint i(0); i < ctrl.tokens.count && ret < out.size() && !halt; ++i)`
			`{`
			`const auto j`
			`{`
			`(i + ctrl.tokens.head) % opts.buffer_tokens`
			`};`

			`const auto tok`
			`{`
			`ctrl.token[j]`
			`};`

			`if(j >= in_size)`
			`out[ret++] = tok;`

			`if(likely(~opts.debug & 0x01))`
			`continue;`

			`if(likely(~opts.debug & 0x02))`
			`if(j < in_size)`
			`continue;`

			`generate_debug(task, j, in_size);`
			`}`

			`ctx::interruption_point();`
			`return vector_view<u16>`
			`{`
			`out, ret`
			`};`
			`}`

			`void`
			`ircd::gpt::generate(task &task)`
			`{`
			`const auto &opts(*task.opts);`
			`auto &ctrl(*task.ctrl);`
ircd::gpt: Reorganize interface. 2021-04-02 22:01:38 +02:00
ircd::gpt: Reorganize task options and control blocks. 2021-05-03 05:40:00 +02:00			`const size_t in_size`
			`{`
			`ctrl.tokens.count`
			`};`
ircd::gpt: More Matrix Than Matrix. 2021-03-05 02:03:33 +01:00
ircd::gpt: Reorganize interface. 2021-04-02 22:01:38 +02:00			`uint64_t cycles(0);`
ircd::gpt: Backpropagate adaptive moment estimations. 2021-04-22 21:20:58 +02:00			`if(ctrl.prop)`
			`{`
			`static f32 *_momentum[2];`
			`if(!_momentum[0])`
			`{`
			`_momentum[0] = new f32[sizeof(model::decoder) / 4] {0.0f};`
			`_momentum[1] = new f32[sizeof(model::decoder) / 4] {0.0f};`
			`}`

			`f32 *const momentum[2]`
			`{`
			`_momentum[0], _momentum[1],`
			`};`

			`const prof::scope_cycles task_cycles`
			`{`
			`cycles`
			`};`

ircd::gpt: Add top N and target label result register control block. 2021-09-18 08:27:23 +02:00			`backprop(task, ctrl.label[0].loss.mean, *model::default_model, momentum);`
ircd::gpt: Backpropagate adaptive moment estimations. 2021-04-22 21:20:58 +02:00			`}`

			`if(ctrl.prop)`
			`{`
			`log::debug`
			`{`
			`log, "Backpropagation of %2.6f in %lu cycles.",`
ircd::gpt: Add top N and target label result register control block. 2021-09-18 08:27:23 +02:00			`ctrl.label[0].loss.mean,`
ircd::gpt: Backpropagate adaptive moment estimations. 2021-04-22 21:20:58 +02:00			`cycles,`
			`};`

ircd::gpt: Reorganize task options and control blocks. 2021-05-03 05:40:00 +02:00			`ctrl.epic.epoch = 0;`
ircd::gpt: Add top N and target label result register control block. 2021-09-18 08:27:23 +02:00			`ctrl.label[0].loss.mean = 0;`
			`ctrl.label[0].loss.last = ctrl.label[0].loss.mean;`
			`ctrl.label[0].perp.mean = 0;`
			`ctrl.label[0].perp.last = ctrl.label[0].perp.mean;`
ircd::gpt: Backpropagate adaptive moment estimations. 2021-04-22 21:20:58 +02:00			`ctrl.prop = false;`
			`pipe::default_model->invalid = true;`
ircd::gpt: Split debug related; improve flush options; minor cleanup. 2021-05-14 14:50:45 +02:00			`return;`
ircd::gpt: Backpropagate adaptive moment estimations. 2021-04-22 21:20:58 +02:00			`}`

			`cycles = 0;`
ircd::gpt: Reorganize interface. 2021-04-02 22:01:38 +02:00			`util::timer stopwatch;`
			`{`
			`const prof::scope_cycles task_cycles`
ircd::gpt: Add basic interface; add options, context. 2021-03-09 11:08:47 +01:00			`{`
ircd::gpt: Reorganize interface. 2021-04-02 22:01:38 +02:00			`cycles`
			`};`
ircd::gpt: Add basic interface; add options, context. 2021-03-09 11:08:47 +01:00
ircd::gpt: Split debug related; improve flush options; minor cleanup. 2021-05-14 14:50:45 +02:00			`pipe::generate(task);`
ircd::gpt: Reorganize interface. 2021-04-02 22:01:38 +02:00			`}`
ircd::gpt: Add basic interface; add options, context. 2021-03-09 11:08:47 +01:00
ircd::gpt: Split debug related; improve flush options; minor cleanup. 2021-05-14 14:50:45 +02:00			`const milliseconds last_time`
ircd::gpt: Reorganize interface. 2021-04-02 22:01:38 +02:00			`{`
ircd::gpt: Split debug related; improve flush options; minor cleanup. 2021-05-14 14:50:45 +02:00			`stopwatch.at<milliseconds>()`
			`};`
ircd::gpt: Abstractor various backend subroutines; generator loop. 2021-03-30 03:18:59 +02:00
ircd::gpt: Split debug related; improve flush options; minor cleanup. 2021-05-14 14:50:45 +02:00			`ctrl.epic.elapsed += last_time.count();`
			`}`
ircd::gpt: Abstractor various backend subroutines; generator loop. 2021-03-30 03:18:59 +02:00
ircd::gpt: Split debug related; improve flush options; minor cleanup. 2021-05-14 14:50:45 +02:00			`void`
			`ircd::gpt::generate_debug(task &task,`
			`const uint &i,`
			`const uint &in_size)`
			`{`
			`const auto &opts(*task.opts);`
			`auto &ctrl(*task.ctrl);`
ircd::gpt: Abstractor various backend subroutines; generator loop. 2021-03-30 03:18:59 +02:00
ircd::gpt: Split debug related; improve flush options; minor cleanup. 2021-05-14 14:50:45 +02:00			`const auto j`
			`{`
			`(i + ctrl.tokens.head) % opts.buffer_tokens`
			`};`
ircd::gpt: More Matrix Than Matrix. 2021-03-05 02:03:33 +01:00
ircd::gpt: Split debug related; improve flush options; minor cleanup. 2021-05-14 14:50:45 +02:00			`const auto tok`
ircd::gpt: More Matrix Than Matrix. 2021-03-05 02:03:33 +01:00			`{`
ircd::gpt: Split debug related; improve flush options; minor cleanup. 2021-05-14 14:50:45 +02:00			`ctrl.token[j]`
			`};`

			`static char dbuf[512];`
			`static char report[1536];`
			`static char tmbuf[4][64];`
			`const size_t bsz(ctrl.tokens.count - in_size);`
			`const size_t report_size = snprintf`
			`(`
			`report, sizeof(report),`
			`"%-3u %-4u %4lu:%-4lu %6.1f%% %5.1fP %6.3fL [%c%c%c] %5u %6.3fL %6.2fP %5.1f%% %s %04x %8s %8s \| %8s",`
			`j,`
			`ctrl.tokens.count,`
			`ctrl.epic.epoch,`
			`ctrl.epic.cycle,`
ircd::gpt: Add top N and target label result register control block. 2021-09-18 08:27:23 +02:00			`0.0f, // cert`
			`std::clamp(ctrl.label[0].perp.mean, 0.0f, 100.0f),`
			`std::clamp(ctrl.label[0].loss.mean, 0.0f, 99.99f),`
			`ctrl.label[0].token == tok? '+': ' ',`
ircd::gpt: Split debug related; improve flush options; minor cleanup. 2021-05-14 14:50:45 +02:00			`' ', // flag place`
			`' ', // flag place`
ircd::gpt: Add top N and target label result register control block. 2021-09-18 08:27:23 +02:00			`ctrl.label[0].token,`
			`std::clamp(ctrl.label[0].loss.last, 0.0f, 99.99f),`
			`std::clamp(ctrl.label[0].perp.last, 0.0f, 100.0f),`
			`0.0f, // cert`
ircd::gpt: Split debug related; improve flush options; minor cleanup. 2021-05-14 14:50:45 +02:00			`vocab::debug(dbuf, tok).c_str(),`
			`tok,`
			`pretty(tmbuf[0], milliseconds(0ms / bsz), 1).c_str(),`
			`pretty(tmbuf[1], si(0UL / bsz), 1).c_str(),`
			`pretty(tmbuf[2], milliseconds(ctrl.epic.elapsed), 1).c_str()`
			`);`

			`log::logf`
			`{`
			`log, log::level::DEBUG,`
			`"%s",`
			`string_view{report, report_size}`
ircd::gpt: More Matrix Than Matrix. 2021-03-05 02:03:33 +01:00			`};`
			`}`

ircd::gpt: Add adaptive moment state to pipe model; move task prelim defs. 2021-04-17 20:53:50 +02:00			`//`
			`// gpt::task`
			`//`

			`ircd::gpt::task::task(const gpt::opts *const opts,`
ircd::gpt: Splits and renames; various reorg. 2021-09-02 19:40:11 +02:00			`gpt::ctrl *const ctrl)`
ircd::gpt: Add adaptive moment state to pipe model; move task prelim defs. 2021-04-17 20:53:50 +02:00			`:opts`
			`{`
			`opts`
			`}`
			`,ctrl`
			`{`
			`ctrl`
			`}`
ircd::gpt: Remove unnecessary alignas. 2021-12-20 18:06:06 +01:00			`,frame`
ircd::gpt: Add adaptive moment state to pipe model; move task prelim defs. 2021-04-17 20:53:50 +02:00			`{`
ircd::gpt: Remove unnecessary alignas. 2021-12-20 18:06:06 +01:00			`new gpt::ctrl[opts->frames]`
			`}`
			`{`
			`memset(ctrl, 0x0, sizeof(gpt::ctrl));`
			`seed(*this, this->opts->seed);`
ircd::gpt: Add adaptive moment state to pipe model; move task prelim defs. 2021-04-17 20:53:50 +02:00			`}`

			`ircd::gpt::task::~task()`
			`noexcept`
			`{`
			`}`

			`//`
ircd::gpt: Reorganize task options and control blocks. 2021-05-03 05:40:00 +02:00			`// gpt::opts`
ircd::gpt: Add adaptive moment state to pipe model; move task prelim defs. 2021-04-17 20:53:50 +02:00			`//`

ircd::gpt: Reorganize task options and control blocks. 2021-05-03 05:40:00 +02:00			`ircd_gpt_opts::ircd_gpt_opts(const ircd::gpt::model::decoder *const model)`
ircd::gpt: Add adaptive moment state to pipe model; move task prelim defs. 2021-04-17 20:53:50 +02:00			`noexcept`
ircd::gpt: Reorganize task options and control blocks. 2021-05-03 05:40:00 +02:00			`:model`
			`{`
ircd::gpt: Split debug related; improve flush options; minor cleanup. 2021-05-14 14:50:45 +02:00			`model?: ircd::gpt::model::default_model`
			`}`
			`,seed`
			`{`
			`1234567890UL`
ircd::gpt: Reorganize task options and control blocks. 2021-05-03 05:40:00 +02:00			`}`
			`,limit`
			`{`
			`-1U`
			`}`
			`,top_k`
			`{`
			`2U`
			`}`
ircd::gpt: Add top_p lmhead selector, quantized for now. 2021-09-17 17:21:20 +02:00			`,top_p`
			`{`
			`90U`
			`}`
ircd::gpt: Add top N and target label result register control block. 2021-09-18 08:27:23 +02:00			`,top_n`
			`{`
			`16`
			`}`
			`,labels`
			`{`
			`0`
			`}`
			`,debug`
			`{`
			`0x01`
			`}`
ircd::gpt: Reorganize task options and control blocks. 2021-05-03 05:40:00 +02:00			`,context_tokens`
			`{`
			`1024U`
			`}`
			`,buffer_tokens`
			`{`
			`1024U`
			`}`
			`,embed_elems`
			`{`
			`768U`
			`}`
ircd::gpt::pipe: Optimize pipeline to cache attention state for generations. 2021-09-17 08:03:44 +02:00			`,attn_rank`
			`{`
			`12U`
			`}`
ircd::gpt: Reorganize task options and control blocks. 2021-05-03 05:40:00 +02:00			`,attn_mult`
			`{`
			`3U`
			`}`
			`,ffnn_mult`
			`{`
			`4U`
			`}`
			`,attn_elems`
			`{`
			`embed_elems * attn_mult`
			`}`
			`,ffnn_elems`
			`{`
			`embed_elems * ffnn_mult`
			`}`
			`,lanes`
			`{`
			`4U`
			`}`
ircd::gpt: Add layer count to model section of opts. 2022-01-23 20:02:22 +01:00			`,layers`
			`{`
			`12`
			`}`
ircd::gpt: Reorganize task options and control blocks. 2021-05-03 05:40:00 +02:00			`,embed_width`
			`{`
			`embed_elems / lanes`
			`}`
			`,attn_width`
			`{`
			`attn_elems / lanes`
			`}`
			`,attn_height`
			`{`
			`embed_elems / lanes`
			`}`
			`,ffnn_width`
			`{`
			`ffnn_elems / lanes`
			`}`
			`,ffnn_height`
			`{`
			`embed_elems / lanes`
			`}`
			`,logits`
			`{`
			`50257`
			`}`
			`,training_steps`
			`{`
			`250000`
			`}`
			`,validation_steps`
			`{`
			`5000`
			`}`
ircd::gpt: Split debug related; improve flush options; minor cleanup. 2021-05-14 14:50:45 +02:00			`,testing_steps`
			`{`
			`5000`
			`}`
ircd::gpt: Reorganize task options and control blocks. 2021-05-03 05:40:00 +02:00			`,alpha`
			`{`
			`0.001f`
			`}`
			`,beta`
			`{`
			`0.9f,`
			`0.999f,`
			`}`
			`,epsilon`
			`{`
			`0.000001`
			`}`
ircd::gpt: Add adaptive moment state to pipe model; move task prelim defs. 2021-04-17 20:53:50 +02:00			`{`
			`}`