update to unittesting (#1983)

* update to unittesting Signed-off-by: Oleksii Kuchaiev <okuchaiev@nvidia.com> * expanding unittesting Signed-off-by: Oleksii Kuchaiev <okuchaiev@nvidia.com> * Update test_megatron.py Signed-off-by: ericharper <complex451@gmail.com> * unskip export test Signed-off-by: ericharper <complex451@gmail.com> * try get test Signed-off-by: ericharper <complex451@gmail.com> * add check to megatron test to make sure it is in ourt CI environment Signed-off-by: Oleksii Kuchaiev <okuchaiev@nvidia.com> Co-authored-by: Eric Harper <complex451@gmail.com>
2021-03-29 23:15:43 -07:00 · 2021-03-29 23:15:43 -07:00 · 9194f32d4b
parent 067c1f2a04
commit 9194f32d4b
7 changed files with 67 additions and 23 deletions
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -5,20 +5,31 @@ We do all of NeMo's development in the open. Contributions from NeMo community a

 # Pull Requests (PR) Guidelines

+**Send your PRs to the `main` branch**
+
 1) Make sure your PR does one thing. Have a clear answer to "What does this PR do?".
 2) Read General Principles and style guide below
-3) Make sure unittest pass on your machine
-4) Make sure you sign your commits. E.g. use ``git commit -s`` when before your commit
-5) Make sure all unittests finish successfully before sending PR ``pytest`` or (if yor dev box does not have GPU) ``pytest --cpu`` from NeMo's root folder
-6) Send your PR and request a review
+3) Make sure you sign your commits. E.g. use ``git commit -s`` when before your commit
+4) Make sure all unittests finish successfully before sending PR ``pytest`` or (if yor dev box does not have GPU) ``pytest --cpu`` from NeMo's root folder
+5) Send your PR and request a review

-Send your PR to the `main` branch
+## Unit tests
+Quick tests (locally, while developing)
+```
+pytest
+# If you don't have NVIDIA GPU do:
+# pytest --cpu
+```
+Full tests, including pre-trained model downloads
+```
+pytest --with_downloads
+```

-Whom should you ask for review:
-1. For changes to NeMo's core: @okuchaiev, @blisc, @titu1994, @tkornuta-nvidia, or @ericharper
-1. For changes to NeMo's ASR collection: @okuchaiev, @titu1994, @redoctopus, @blisc, or @vsl9
-1. For changes to NeMo's NLP collection: @ekmb, @yzhang123, @VahidooX, @vladgets, or @ericharper
-1. For changes to NeMo's TTS collection: @blisc or @stasbel
+## Whom should you ask for review:
+1. For changes to NeMo's core: @ericharper, @titu1994, @blisc, or @okuchaiev  
+1. For changes to NeMo's ASR collection: @titu1994, @redoctopus, @jbalam-nv, or @okuchaiev
+1. For changes to NeMo's NLP collection: @MaximumEntropy, @ericharper, @ekmb, @yzhang123, @VahidooX, @vladgets, or @okuchaiev 
+1. For changes to NeMo's TTS collection: @blisc or @stasbel, or @okuchaiev

 Note that some people may self-assign to review your PR - in which case, please wait for them to add a review.

@ -27,7 +38,7 @@ Your  pull requests must pass all checks and peer-review before they can be merg
 # General principles
 1. **User-oriented**: make it easy for end users, even at the cost of writing more code in the background
 1. **Robust**: make it hard for users to make mistakes.
-1. **Supporting of both training and inferencing**: if a module can only be used for training, write a companion module to be used during inference.
+1. **Well-tested**: please add simple, fast unittests. Consider adding CI tests for end-to-end functionality.
 1. **Reusable**: for every piece of code, think about how it can be reused in the future and make it easy to be reused.
 1. **Readable**: code should be easier to read.
 1. **Legal**: if you copy even one line of code from the Internet, make sure that the code allows the license that NeMo supports. Give credit and link back to the code.
--- a/4
+++ b/4
@ -54,7 +54,7 @@ pipeline {

    stage('L0: Unit Tests GPU') {
      steps {
-        sh 'pytest -m "unit and not skipduringci and not pleasefixme"'
+        sh 'pytest -m "not pleasefixme" --with_downloads'
      }
    }

@ -66,7 +66,7 @@ pipeline {
        }
      }
      steps {
-        sh 'CUDA_VISIBLE_DEVICES="" pytest -m "unit and not pleasefixme" --cpu'
+        sh 'CUDA_VISIBLE_DEVICES="" pytest -m "not pleasefixme" --cpu --with_downloads'
      }
    }

--- a/tests/collections/nlp/test_huggingface.py
+++ b/tests/collections/nlp/test_huggingface.py
@ -40,30 +40,35 @@ class TestHuggingFace(TestCase):
        pretrained_lm_models = nemo_nlp.modules.get_pretrained_lm_models_list()
        self.assertTrue(len(pretrained_lm_models) > 0)

+    @pytest.mark.with_downloads()
    @pytest.mark.unit
    def test_get_pretrained_bert_model(self):
        model = nemo_nlp.modules.get_lm_model(pretrained_model_name='bert-base-uncased')
        assert isinstance(model, nemo_nlp.modules.BertEncoder)
        do_export(model, "bert-base-uncased")

+    @pytest.mark.with_downloads()
    @pytest.mark.unit
    def test_get_pretrained_distilbert_model(self):
        model = nemo_nlp.modules.get_lm_model(pretrained_model_name='distilbert-base-uncased')
        assert isinstance(model, nemo_nlp.modules.DistilBertEncoder)
        do_export(model, "distilbert-base-uncased")

+    @pytest.mark.with_downloads()
    @pytest.mark.unit
    def test_get_pretrained_roberta_model(self):
        model = nemo_nlp.modules.get_lm_model(pretrained_model_name='roberta-base')
        assert isinstance(model, nemo_nlp.modules.RobertaEncoder)
        do_export(model, "roberta-base-uncased")

+    @pytest.mark.with_downloads()
    @pytest.mark.unit
    def test_get_pretrained_albert_model(self):
        model = nemo_nlp.modules.get_lm_model(pretrained_model_name='albert-base-v1')
        assert isinstance(model, nemo_nlp.modules.AlbertEncoder)
        do_export(model, "albert-base-v1")

+    @pytest.mark.with_downloads()
    @pytest.mark.unit
    def test_get_pretrained_chinese_bert_wwm_model(self):
        model_name = 'hfl/chinese-bert-wwm'
@ -72,6 +77,7 @@ class TestHuggingFace(TestCase):
        tokenizer = get_tokenizer(tokenizer_name=model_name)
        assert isinstance(tokenizer, AutoTokenizer)

+    @pytest.mark.with_downloads()
    @pytest.mark.unit
    def test_get_pretrained_arabic_model(self):
        model_name = 'asafaya/bert-base-arabic'
--- a/tests/collections/nlp/test_megatron.py
+++ b/tests/collections/nlp/test_megatron.py
@ -31,6 +31,14 @@ import nemo.collections.nlp as nemo_nlp
 from nemo.core.classes import typecheck


+def get_pretrained_bert_345m_uncased_model():
+    model_name = "megatron-bert-345m-uncased"
+    model = nemo_nlp.modules.get_lm_model(pretrained_model_name=model_name)
+    if torch.cuda.is_available():
+        model = model.cuda()
+    return model
+
+
 class TestMegatron(TestCase):
    @pytest.mark.run_only_on('GPU')
    @pytest.mark.unit
@ -38,33 +46,31 @@ class TestMegatron(TestCase):
        pretrained_lm_models = nemo_nlp.modules.get_pretrained_lm_models_list()
        self.assertTrue(len(pretrained_lm_models) > 0)

+    @pytest.mark.skipif(not os.path.exists('/home/TestData/nlp'), reason='Not a Jenkins machine')
+    @pytest.mark.with_downloads()
    @pytest.mark.run_only_on('GPU')
    @pytest.mark.unit
-    def test_get_pretrained_bert_345m_uncased_model(self):
-        model_name = "megatron-bert-345m-uncased"
-        model = nemo_nlp.modules.get_lm_model(pretrained_model_name=model_name)
-        if torch.cuda.is_available():
-            model = model.cuda()
-
+    def test_get_model(self):
+        model = get_pretrained_bert_345m_uncased_model()
        assert isinstance(model, nemo_nlp.modules.MegatronBertEncoder)

        typecheck.set_typecheck_enabled(enabled=False)
        inp = model.input_example()
        out = model.forward(*inp)
        typecheck.set_typecheck_enabled(enabled=True)
-        self.model = model

    @pytest.mark.run_only_on('GPU')
    @pytest.mark.unit
    @pytest.mark.skip('ONNX export is broken in PyTorch')
    def test_onnx_export(self):
-        assert self.model
+        model = get_pretrained_bert_345m_uncased_model()
+        assert model
        with tempfile.TemporaryDirectory() as tmpdir:
            # Generate filename in the temporary directory.
            # Test export.
-            self.model.export(os.path.join(tmpdir, "megatron.onnx"))
+            model.export(os.path.join(tmpdir, "megatron.onnx"))


 if __name__ == "__main__":
    t = TestMegatron()
-    t.test_get_pretrained_bert_345m_uncased_model()
+    t.test_get_model()
--- a/tests/collections/nlp/test_pretrained_models_performance.py
+++ b/tests/collections/nlp/test_pretrained_models_performance.py
@ -55,6 +55,7 @@ def data_exists(data_dir):


 class TestPretrainedModelPerformance(TestCase):
+    @pytest.mark.with_downloads()
    @pytest.mark.unit
    @pytest.mark.run_only_on('GPU')
    @pytest.mark.skipif(
@ -83,6 +84,7 @@ class TestPretrainedModelPerformance(TestCase):
    @pytest.mark.skipif(
        not data_exists('/home/TestData/nlp/token_classification_punctuation/fisher'), reason='Not a Jenkins machine'
    )
+    @pytest.mark.with_downloads()
    def test_punct_capit_with_distilbert(self):
        data_dir = '/home/TestData/nlp/token_classification_punctuation/fisher'
        model = models.PunctuationCapitalizationModel.from_pretrained("punctuation_en_distilbert")
@ -93,6 +95,7 @@ class TestPretrainedModelPerformance(TestCase):
        assert abs(metrics['punct_f1'] - 52.4225) < 0.001
        assert int(model.punct_class_report.total_examples) == 128

+    @pytest.mark.with_downloads()
    @pytest.mark.unit
    @pytest.mark.run_only_on('GPU')
    @pytest.mark.skipif(
--- a/tests/conftest.py
+++ b/tests/conftest.py
@ -42,6 +42,11 @@ def pytest_addoption(parser):
        action='store_true',
        help="pass that argument to use local test data/skip downloading from URL/GitHub (DEFAULT: False)",
    )
+    parser.addoption(
+        '--with_downloads',
+        action='store_true',
+        help="pass this argument to active tests which download models from the cloud.",
+    )


@pytest.fixture
@ -60,6 +65,15 @@ def run_only_on_device_fixture(request, device):
            pytest.skip('skipped on this device: {}'.format(device))


+@pytest.fixture(autouse=True)
+def downloads_weights(request, device):
+    if request.node.get_closest_marker('with_downloads'):
+        if not request.config.getoption("--with_downloads"):
+            pytest.skip(
+                'To run this test, pass --with_downloads option. It will download (and cache) models from cloud.'
+            )
+
+
@pytest.fixture
 def cleanup_local_folder():
    # Asserts in fixture are not recommended, but I'd rather stop users from deleting expensive training runs
--- a/tests/core/test_save_restore.py
+++ b/tests/core/test_save_restore.py
@ -120,24 +120,28 @@ class TestSaveRestore:

            return model_copy

+    @pytest.mark.with_downloads()
    @pytest.mark.unit
    def test_EncDecCTCModel(self):
        # TODO: Switch to using named configs because here we don't really care about weights
        qn = EncDecCTCModel.from_pretrained(model_name="QuartzNet15x5Base-En")
        self.__test_restore_elsewhere(model=qn, attr_for_eq_check=set(["decoder._feat_in", "decoder._num_classes"]))

+    @pytest.mark.with_downloads()
    @pytest.mark.unit
    def test_EncDecCTCModelBPE(self):
        # TODO: Switch to using named configs because here we don't really care about weights
        cn = EncDecCTCModelBPE.from_pretrained(model_name="stt_en_citrinet_256")
        self.__test_restore_elsewhere(model=cn, attr_for_eq_check=set(["decoder._feat_in", "decoder._num_classes"]))

+    @pytest.mark.with_downloads()
    @pytest.mark.unit
    def test_EncDecCTCModelBPE_v2(self):
        # TODO: Switch to using named configs because here we don't really care about weights
        cn = EncDecCTCModelBPE.from_pretrained(model_name="stt_en_conformer_ctc_small")
        self.__test_restore_elsewhere(model=cn, attr_for_eq_check=set(["decoder._feat_in", "decoder._num_classes"]))

+    @pytest.mark.with_downloads()
    @pytest.mark.unit
    def test_PunctuationCapitalization(self):
        # TODO: Switch to using named configs because here we don't really care about weights