mirror of
https://github.com/NixOS/nixpkgs.git
synced 2024-11-19 16:45:49 +01:00
python3Packages.tokenizers: 0.9.2 -> 0.9.4
Changelog: https://github.com/huggingface/tokenizers/releases/tag/python-v0.9.3 https://github.com/huggingface/tokenizers/releases/tag/python-v0.9.4
This commit is contained in:
parent
5c737382f3
commit
c67382b02f
1 changed files with 13 additions and 3 deletions
|
@ -32,6 +32,14 @@ let
|
|||
url = "https://norvig.com/big.txt";
|
||||
sha256 = "0yz80icdly7na03cfpl0nfk5h3j3cam55rj486n03wph81ynq1ps";
|
||||
};
|
||||
docPipelineTokenizer = fetchurl {
|
||||
url = "https://s3.amazonaws.com/models.huggingface.co/bert/anthony/doc-pipeline/tokenizer.json";
|
||||
hash = "sha256-i533xC8J5CDMNxBjo+p6avIM8UOcui8RmGAmK0GmfBc=";
|
||||
};
|
||||
docQuicktourTokenizer = fetchurl {
|
||||
url = "https://s3.amazonaws.com/models.huggingface.co/bert/anthony/doc-quicktour/tokenizer.json";
|
||||
hash = "sha256-ipY9d5DR5nxoO6kj7rItueZ9AO5wq9+Nzr6GuEIfIBI=";
|
||||
};
|
||||
openaiVocab = fetchurl {
|
||||
url = "https://s3.amazonaws.com/models.huggingface.co/bert/openai-gpt-vocab.json";
|
||||
sha256 = "0y40gc9bixj5rxv674br1rxmxkd3ly29p80x1596h8yywwcrpx7x";
|
||||
|
@ -42,16 +50,16 @@ let
|
|||
};
|
||||
in rustPlatform.buildRustPackage rec {
|
||||
pname = "tokenizers";
|
||||
version = "0.9.2";
|
||||
version = "0.9.4";
|
||||
|
||||
src = fetchFromGitHub {
|
||||
owner = "huggingface";
|
||||
repo = pname;
|
||||
rev = "python-v${version}";
|
||||
sha256 = "0rsm1g5zfq3ygdb3s8v9xqqpgfzvvkc4n5ik3ahy8sw7pyjljb4m";
|
||||
hash = "sha256-JXoH9yfhMIFg5qDY5zrF6iWb7XKugjMfk1NxSizfaWg=";
|
||||
};
|
||||
|
||||
cargoSha256 = "0yn699dq9hdjh7fyci99ni8mmd5qdhzrsi80grzgf5cch8g38rbi";
|
||||
cargoSha256 = "sha256-u9qitrOxJSABs0VjwHUZgmw7VTQXNbp6l8fKKE/RQ7M=";
|
||||
|
||||
sourceRoot = "source/bindings/python";
|
||||
|
||||
|
@ -82,6 +90,8 @@ in rustPlatform.buildRustPackage rec {
|
|||
ln -s ${robertaMerges} roberta-base-merges.txt
|
||||
ln -s ${albertVocab} albert-base-v1-tokenizer.json
|
||||
ln -s ${bertVocab} bert-base-uncased-vocab.txt
|
||||
ln -s ${docPipelineTokenizer} bert-wiki.json
|
||||
ln -s ${docQuicktourTokenizer} tokenizer-wiki.json
|
||||
ln -s ${norvigBig} big.txt
|
||||
ln -s ${openaiVocab} openai-gpt-vocab.json
|
||||
ln -s ${openaiMerges} openai-gpt-merges.txt )
|
||||
|
|
Loading…
Reference in a new issue