update to unittesting (#1983)

* update to unittesting

Signed-off-by: Oleksii Kuchaiev <okuchaiev@nvidia.com>

* expanding unittesting

Signed-off-by: Oleksii Kuchaiev <okuchaiev@nvidia.com>

* Update test_megatron.py

Signed-off-by: ericharper <complex451@gmail.com>

* unskip export test

Signed-off-by: ericharper <complex451@gmail.com>

* try get test

Signed-off-by: ericharper <complex451@gmail.com>

* add check to megatron test to make sure it is in
ourt CI environment

Signed-off-by: Oleksii Kuchaiev <okuchaiev@nvidia.com>

Co-authored-by: Eric Harper <complex451@gmail.com>
This commit is contained in:
Oleksii Kuchaiev 2021-03-29 23:15:43 -07:00 committed by GitHub
parent 067c1f2a04
commit 9194f32d4b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 67 additions and 23 deletions

View file

@ -5,20 +5,31 @@ We do all of NeMo's development in the open. Contributions from NeMo community a
# Pull Requests (PR) Guidelines
**Send your PRs to the `main` branch**
1) Make sure your PR does one thing. Have a clear answer to "What does this PR do?".
2) Read General Principles and style guide below
3) Make sure unittest pass on your machine
4) Make sure you sign your commits. E.g. use ``git commit -s`` when before your commit
5) Make sure all unittests finish successfully before sending PR ``pytest`` or (if yor dev box does not have GPU) ``pytest --cpu`` from NeMo's root folder
6) Send your PR and request a review
3) Make sure you sign your commits. E.g. use ``git commit -s`` when before your commit
4) Make sure all unittests finish successfully before sending PR ``pytest`` or (if yor dev box does not have GPU) ``pytest --cpu`` from NeMo's root folder
5) Send your PR and request a review
Send your PR to the `main` branch
## Unit tests
Quick tests (locally, while developing)
```
pytest
# If you don't have NVIDIA GPU do:
# pytest --cpu
```
Full tests, including pre-trained model downloads
```
pytest --with_downloads
```
Whom should you ask for review:
1. For changes to NeMo's core: @okuchaiev, @blisc, @titu1994, @tkornuta-nvidia, or @ericharper
1. For changes to NeMo's ASR collection: @okuchaiev, @titu1994, @redoctopus, @blisc, or @vsl9
1. For changes to NeMo's NLP collection: @ekmb, @yzhang123, @VahidooX, @vladgets, or @ericharper
1. For changes to NeMo's TTS collection: @blisc or @stasbel
## Whom should you ask for review:
1. For changes to NeMo's core: @ericharper, @titu1994, @blisc, or @okuchaiev
1. For changes to NeMo's ASR collection: @titu1994, @redoctopus, @jbalam-nv, or @okuchaiev
1. For changes to NeMo's NLP collection: @MaximumEntropy, @ericharper, @ekmb, @yzhang123, @VahidooX, @vladgets, or @okuchaiev
1. For changes to NeMo's TTS collection: @blisc or @stasbel, or @okuchaiev
Note that some people may self-assign to review your PR - in which case, please wait for them to add a review.
@ -27,7 +38,7 @@ Your pull requests must pass all checks and peer-review before they can be merg
# General principles
1. **User-oriented**: make it easy for end users, even at the cost of writing more code in the background
1. **Robust**: make it hard for users to make mistakes.
1. **Supporting of both training and inferencing**: if a module can only be used for training, write a companion module to be used during inference.
1. **Well-tested**: please add simple, fast unittests. Consider adding CI tests for end-to-end functionality.
1. **Reusable**: for every piece of code, think about how it can be reused in the future and make it easy to be reused.
1. **Readable**: code should be easier to read.
1. **Legal**: if you copy even one line of code from the Internet, make sure that the code allows the license that NeMo supports. Give credit and link back to the code.

4
Jenkinsfile vendored
View file

@ -54,7 +54,7 @@ pipeline {
stage('L0: Unit Tests GPU') {
steps {
sh 'pytest -m "unit and not skipduringci and not pleasefixme"'
sh 'pytest -m "not pleasefixme" --with_downloads'
}
}
@ -66,7 +66,7 @@ pipeline {
}
}
steps {
sh 'CUDA_VISIBLE_DEVICES="" pytest -m "unit and not pleasefixme" --cpu'
sh 'CUDA_VISIBLE_DEVICES="" pytest -m "not pleasefixme" --cpu --with_downloads'
}
}

View file

@ -40,30 +40,35 @@ class TestHuggingFace(TestCase):
pretrained_lm_models = nemo_nlp.modules.get_pretrained_lm_models_list()
self.assertTrue(len(pretrained_lm_models) > 0)
@pytest.mark.with_downloads()
@pytest.mark.unit
def test_get_pretrained_bert_model(self):
model = nemo_nlp.modules.get_lm_model(pretrained_model_name='bert-base-uncased')
assert isinstance(model, nemo_nlp.modules.BertEncoder)
do_export(model, "bert-base-uncased")
@pytest.mark.with_downloads()
@pytest.mark.unit
def test_get_pretrained_distilbert_model(self):
model = nemo_nlp.modules.get_lm_model(pretrained_model_name='distilbert-base-uncased')
assert isinstance(model, nemo_nlp.modules.DistilBertEncoder)
do_export(model, "distilbert-base-uncased")
@pytest.mark.with_downloads()
@pytest.mark.unit
def test_get_pretrained_roberta_model(self):
model = nemo_nlp.modules.get_lm_model(pretrained_model_name='roberta-base')
assert isinstance(model, nemo_nlp.modules.RobertaEncoder)
do_export(model, "roberta-base-uncased")
@pytest.mark.with_downloads()
@pytest.mark.unit
def test_get_pretrained_albert_model(self):
model = nemo_nlp.modules.get_lm_model(pretrained_model_name='albert-base-v1')
assert isinstance(model, nemo_nlp.modules.AlbertEncoder)
do_export(model, "albert-base-v1")
@pytest.mark.with_downloads()
@pytest.mark.unit
def test_get_pretrained_chinese_bert_wwm_model(self):
model_name = 'hfl/chinese-bert-wwm'
@ -72,6 +77,7 @@ class TestHuggingFace(TestCase):
tokenizer = get_tokenizer(tokenizer_name=model_name)
assert isinstance(tokenizer, AutoTokenizer)
@pytest.mark.with_downloads()
@pytest.mark.unit
def test_get_pretrained_arabic_model(self):
model_name = 'asafaya/bert-base-arabic'

View file

@ -31,6 +31,14 @@ import nemo.collections.nlp as nemo_nlp
from nemo.core.classes import typecheck
def get_pretrained_bert_345m_uncased_model():
model_name = "megatron-bert-345m-uncased"
model = nemo_nlp.modules.get_lm_model(pretrained_model_name=model_name)
if torch.cuda.is_available():
model = model.cuda()
return model
class TestMegatron(TestCase):
@pytest.mark.run_only_on('GPU')
@pytest.mark.unit
@ -38,33 +46,31 @@ class TestMegatron(TestCase):
pretrained_lm_models = nemo_nlp.modules.get_pretrained_lm_models_list()
self.assertTrue(len(pretrained_lm_models) > 0)
@pytest.mark.skipif(not os.path.exists('/home/TestData/nlp'), reason='Not a Jenkins machine')
@pytest.mark.with_downloads()
@pytest.mark.run_only_on('GPU')
@pytest.mark.unit
def test_get_pretrained_bert_345m_uncased_model(self):
model_name = "megatron-bert-345m-uncased"
model = nemo_nlp.modules.get_lm_model(pretrained_model_name=model_name)
if torch.cuda.is_available():
model = model.cuda()
def test_get_model(self):
model = get_pretrained_bert_345m_uncased_model()
assert isinstance(model, nemo_nlp.modules.MegatronBertEncoder)
typecheck.set_typecheck_enabled(enabled=False)
inp = model.input_example()
out = model.forward(*inp)
typecheck.set_typecheck_enabled(enabled=True)
self.model = model
@pytest.mark.run_only_on('GPU')
@pytest.mark.unit
@pytest.mark.skip('ONNX export is broken in PyTorch')
def test_onnx_export(self):
assert self.model
model = get_pretrained_bert_345m_uncased_model()
assert model
with tempfile.TemporaryDirectory() as tmpdir:
# Generate filename in the temporary directory.
# Test export.
self.model.export(os.path.join(tmpdir, "megatron.onnx"))
model.export(os.path.join(tmpdir, "megatron.onnx"))
if __name__ == "__main__":
t = TestMegatron()
t.test_get_pretrained_bert_345m_uncased_model()
t.test_get_model()

View file

@ -55,6 +55,7 @@ def data_exists(data_dir):
class TestPretrainedModelPerformance(TestCase):
@pytest.mark.with_downloads()
@pytest.mark.unit
@pytest.mark.run_only_on('GPU')
@pytest.mark.skipif(
@ -83,6 +84,7 @@ class TestPretrainedModelPerformance(TestCase):
@pytest.mark.skipif(
not data_exists('/home/TestData/nlp/token_classification_punctuation/fisher'), reason='Not a Jenkins machine'
)
@pytest.mark.with_downloads()
def test_punct_capit_with_distilbert(self):
data_dir = '/home/TestData/nlp/token_classification_punctuation/fisher'
model = models.PunctuationCapitalizationModel.from_pretrained("punctuation_en_distilbert")
@ -93,6 +95,7 @@ class TestPretrainedModelPerformance(TestCase):
assert abs(metrics['punct_f1'] - 52.4225) < 0.001
assert int(model.punct_class_report.total_examples) == 128
@pytest.mark.with_downloads()
@pytest.mark.unit
@pytest.mark.run_only_on('GPU')
@pytest.mark.skipif(

View file

@ -42,6 +42,11 @@ def pytest_addoption(parser):
action='store_true',
help="pass that argument to use local test data/skip downloading from URL/GitHub (DEFAULT: False)",
)
parser.addoption(
'--with_downloads',
action='store_true',
help="pass this argument to active tests which download models from the cloud.",
)
@pytest.fixture
@ -60,6 +65,15 @@ def run_only_on_device_fixture(request, device):
pytest.skip('skipped on this device: {}'.format(device))
@pytest.fixture(autouse=True)
def downloads_weights(request, device):
if request.node.get_closest_marker('with_downloads'):
if not request.config.getoption("--with_downloads"):
pytest.skip(
'To run this test, pass --with_downloads option. It will download (and cache) models from cloud.'
)
@pytest.fixture
def cleanup_local_folder():
# Asserts in fixture are not recommended, but I'd rather stop users from deleting expensive training runs

View file

@ -120,24 +120,28 @@ class TestSaveRestore:
return model_copy
@pytest.mark.with_downloads()
@pytest.mark.unit
def test_EncDecCTCModel(self):
# TODO: Switch to using named configs because here we don't really care about weights
qn = EncDecCTCModel.from_pretrained(model_name="QuartzNet15x5Base-En")
self.__test_restore_elsewhere(model=qn, attr_for_eq_check=set(["decoder._feat_in", "decoder._num_classes"]))
@pytest.mark.with_downloads()
@pytest.mark.unit
def test_EncDecCTCModelBPE(self):
# TODO: Switch to using named configs because here we don't really care about weights
cn = EncDecCTCModelBPE.from_pretrained(model_name="stt_en_citrinet_256")
self.__test_restore_elsewhere(model=cn, attr_for_eq_check=set(["decoder._feat_in", "decoder._num_classes"]))
@pytest.mark.with_downloads()
@pytest.mark.unit
def test_EncDecCTCModelBPE_v2(self):
# TODO: Switch to using named configs because here we don't really care about weights
cn = EncDecCTCModelBPE.from_pretrained(model_name="stt_en_conformer_ctc_small")
self.__test_restore_elsewhere(model=cn, attr_for_eq_check=set(["decoder._feat_in", "decoder._num_classes"]))
@pytest.mark.with_downloads()
@pytest.mark.unit
def test_PunctuationCapitalization(self):
# TODO: Switch to using named configs because here we don't really care about weights