mirror of
https://github.com/matrix-construct/construct
synced 2024-12-26 15:33:54 +01:00
ircd::gpt::vocab: Add token debug string tool.
This commit is contained in:
parent
33a1ffd4bf
commit
734948863f
2 changed files with 22 additions and 0 deletions
|
@ -38,4 +38,7 @@ namespace ircd::gpt::vocab
|
|||
|
||||
// Decode token values to build output text string.
|
||||
string_view detokenize(const mutable_buffer &out, const vector_view<const u16> &in);
|
||||
|
||||
// Other tools
|
||||
string_view debug(const mutable_buffer &buf, const u16 token);
|
||||
}
|
||||
|
|
|
@ -155,6 +155,25 @@ ircd::gpt::vocab::init_merges()
|
|||
});
|
||||
}
|
||||
|
||||
ircd::string_view
|
||||
ircd::gpt::vocab::debug(const mutable_buffer &out,
|
||||
const u16 idx)
|
||||
{
|
||||
const auto *const token
|
||||
{
|
||||
reinterpret_cast<const u8x16 *>(vocab::token)
|
||||
};
|
||||
|
||||
thread_local char strbuf[2][512];
|
||||
return string_view{fmt::sprintf
|
||||
{
|
||||
out, "%5u %s [%32s]",
|
||||
idx,
|
||||
simd::print_mem(strbuf[0], token[idx]),
|
||||
simd::print_chr(strbuf[1], token[idx]),
|
||||
}};
|
||||
}
|
||||
|
||||
//
|
||||
// detokenize
|
||||
//
|
||||
|
|
Loading…
Reference in a new issue