From 483ce5ce9c1cea6e04353b6f5072312fc82f2c56 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20de=20Kok?= Date: Sat, 16 Jan 2021 11:01:21 +0100 Subject: [PATCH 1/3] python3Packages.transformers: 4.1.1 -> 4.2.1 Changelog: https://github.com/huggingface/transformers/releases/tag/v4.2.0 https://github.com/huggingface/transformers/releases/tag/v4.2.1 Changes to the derivation: - Enable on Python 3.9. - Disable checks and use pythonImportsCheck instead. The list of excluded tests was getting silly. The vast majority of tests require internet access (to download models). I guess at this point we just have to accept that it is not practical to run the tests. --- .../python-modules/transformers/default.nix | 66 +++---------------- 1 file changed, 8 insertions(+), 58 deletions(-) diff --git a/pkgs/development/python-modules/transformers/default.nix b/pkgs/development/python-modules/transformers/default.nix index 743cbebea755..e2409c155c95 100644 --- a/pkgs/development/python-modules/transformers/default.nix +++ b/pkgs/development/python-modules/transformers/default.nix @@ -1,32 +1,28 @@ { buildPythonPackage , lib, stdenv , fetchFromGitHub -, isPy39 +, pythonOlder , cookiecutter , filelock +, importlib-metadata , regex , requests , numpy -, pandas -, parameterized , protobuf , sacremoses -, timeout-decorator , tokenizers , tqdm -, pytestCheckHook }: buildPythonPackage rec { pname = "transformers"; - version = "4.1.1"; - disabled = isPy39; + version = "4.2.1"; src = fetchFromGitHub { owner = "huggingface"; repo = pname; rev = "v${version}"; - sha256 = "1l1gxdsakjmzsgggypq45pnwm87brhlccjfzafs43460pz0wbd6k"; + sha256 = "0yf5s878i6v298wxm4cwkb33qyxz5bdr75jmsnldpdw4ml31c3nn"; }; propagatedBuildInputs = [ @@ -39,63 +35,17 @@ buildPythonPackage rec { sacremoses tokenizers tqdm - ]; + ] ++ stdenv.lib.optionals (pythonOlder "3.8") [ importlib-metadata ]; - checkInputs = [ - pandas - parameterized - pytestCheckHook - timeout-decorator - ]; + # Many tests require internet access. + doCheck = false; postPatch = '' substituteInPlace setup.py \ --replace "tokenizers == 0.9.4" "tokenizers" ''; - preCheck = '' - export HOME="$TMPDIR" - - # This test requires the `datasets` module to download test - # data. However, since we cannot download in the Nix sandbox - # and `dataset` is an optional dependency for transformers - # itself, we will just remove the tests files that import - # `dataset`. - rm tests/test_retrieval_rag.py - rm tests/test_trainer.py - ''; - - # We have to run from the main directory for the tests. However, - # letting pytest discover tests leads to errors. - pytestFlagsArray = [ "tests" ]; - - # Disable tests that require network access. - disabledTests = [ - "BlenderbotSmallTokenizerTest" - "Blenderbot3BTokenizerTests" - "GetFromCacheTests" - "TokenizationTest" - "TestTokenizationBart" - "test_all_tokenizers" - "test_batch_encoding_is_fast" - "test_batch_encoding_pickle" - "test_batch_encoding_word_to_tokens" - "test_config_from_model_shortcut" - "test_config_model_type_from_model_identifier" - "test_from_pretrained_use_fast_toggle" - "test_hf_api" - "test_outputs_can_be_shorter" - "test_outputs_not_longer_than_maxlen" - "test_padding_accepts_tensors" - "test_pretokenized_tokenizers" - "test_tokenizer_equivalence_en_de" - "test_tokenizer_from_model_type" - "test_tokenizer_from_model_type" - "test_tokenizer_from_pretrained" - "test_tokenizer_from_tokenizer_class" - "test_tokenizer_identifier_with_correct_config" - "test_tokenizer_identifier_non_existent" - ]; + pythonImportsCheck = [ "transformers" ]; meta = with lib; { homepage = "https://github.com/huggingface/transformers"; From 861aade89835c415266e1a2d7c3fced300ac2910 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20de=20Kok?= Date: Sat, 23 Jan 2021 16:18:51 +0100 Subject: [PATCH 2/3] python3Packages.transformers: 4.2.1 -> 4.2.2 Changelog: https://github.com/huggingface/transformers/releases/tag/v4.2.2 --- pkgs/development/python-modules/transformers/default.nix | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkgs/development/python-modules/transformers/default.nix b/pkgs/development/python-modules/transformers/default.nix index e2409c155c95..2eaf4176fea1 100644 --- a/pkgs/development/python-modules/transformers/default.nix +++ b/pkgs/development/python-modules/transformers/default.nix @@ -16,13 +16,13 @@ buildPythonPackage rec { pname = "transformers"; - version = "4.2.1"; + version = "4.2.2"; src = fetchFromGitHub { owner = "huggingface"; repo = pname; rev = "v${version}"; - sha256 = "0yf5s878i6v298wxm4cwkb33qyxz5bdr75jmsnldpdw4ml31c3nn"; + hash = "sha256-sBMCzEgYX6HQbzoEIYnmMdpYecCCsQjTdl2mO1Veu9M="; }; propagatedBuildInputs = [ From 7f840a1acf260ba7637eb0603fb4aaff16b5d2d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20de=20Kok?= Date: Sat, 23 Jan 2021 16:22:56 +0100 Subject: [PATCH 3/3] python3Packages.transformers: re-relax tokenizer bound --- pkgs/development/python-modules/transformers/default.nix | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pkgs/development/python-modules/transformers/default.nix b/pkgs/development/python-modules/transformers/default.nix index 2eaf4176fea1..ee288f7626b8 100644 --- a/pkgs/development/python-modules/transformers/default.nix +++ b/pkgs/development/python-modules/transformers/default.nix @@ -41,8 +41,7 @@ buildPythonPackage rec { doCheck = false; postPatch = '' - substituteInPlace setup.py \ - --replace "tokenizers == 0.9.4" "tokenizers" + sed -ri 's/tokenizers==[0-9.]+/tokenizers/g' setup.py ''; pythonImportsCheck = [ "transformers" ];