0
0
Fork 0
mirror of https://github.com/matrix-construct/construct synced 2024-06-02 18:18:56 +02:00

ircd::gpt::vocab: Add token debug string tool.

This commit is contained in:
Jason Volk 2021-03-09 04:48:17 -08:00
parent 33a1ffd4bf
commit 734948863f
2 changed files with 22 additions and 0 deletions

View file

@ -38,4 +38,7 @@ namespace ircd::gpt::vocab
// Decode token values to build output text string.
string_view detokenize(const mutable_buffer &out, const vector_view<const u16> &in);
// Other tools
string_view debug(const mutable_buffer &buf, const u16 token);
}

View file

@ -155,6 +155,25 @@ ircd::gpt::vocab::init_merges()
});
}
ircd::string_view
ircd::gpt::vocab::debug(const mutable_buffer &out,
const u16 idx)
{
const auto *const token
{
reinterpret_cast<const u8x16 *>(vocab::token)
};
thread_local char strbuf[2][512];
return string_view{fmt::sprintf
{
out, "%5u %s [%32s]",
idx,
simd::print_mem(strbuf[0], token[idx]),
simd::print_chr(strbuf[1], token[idx]),
}};
}
//
// detokenize
//