update to unittesting (#1983)
* update to unittesting Signed-off-by: Oleksii Kuchaiev <okuchaiev@nvidia.com> * expanding unittesting Signed-off-by: Oleksii Kuchaiev <okuchaiev@nvidia.com> * Update test_megatron.py Signed-off-by: ericharper <complex451@gmail.com> * unskip export test Signed-off-by: ericharper <complex451@gmail.com> * try get test Signed-off-by: ericharper <complex451@gmail.com> * add check to megatron test to make sure it is in ourt CI environment Signed-off-by: Oleksii Kuchaiev <okuchaiev@nvidia.com> Co-authored-by: Eric Harper <complex451@gmail.com>
This commit is contained in:
parent
067c1f2a04
commit
9194f32d4b
|
@ -5,20 +5,31 @@ We do all of NeMo's development in the open. Contributions from NeMo community a
|
|||
|
||||
# Pull Requests (PR) Guidelines
|
||||
|
||||
**Send your PRs to the `main` branch**
|
||||
|
||||
1) Make sure your PR does one thing. Have a clear answer to "What does this PR do?".
|
||||
2) Read General Principles and style guide below
|
||||
3) Make sure unittest pass on your machine
|
||||
4) Make sure you sign your commits. E.g. use ``git commit -s`` when before your commit
|
||||
5) Make sure all unittests finish successfully before sending PR ``pytest`` or (if yor dev box does not have GPU) ``pytest --cpu`` from NeMo's root folder
|
||||
6) Send your PR and request a review
|
||||
3) Make sure you sign your commits. E.g. use ``git commit -s`` when before your commit
|
||||
4) Make sure all unittests finish successfully before sending PR ``pytest`` or (if yor dev box does not have GPU) ``pytest --cpu`` from NeMo's root folder
|
||||
5) Send your PR and request a review
|
||||
|
||||
Send your PR to the `main` branch
|
||||
## Unit tests
|
||||
Quick tests (locally, while developing)
|
||||
```
|
||||
pytest
|
||||
# If you don't have NVIDIA GPU do:
|
||||
# pytest --cpu
|
||||
```
|
||||
Full tests, including pre-trained model downloads
|
||||
```
|
||||
pytest --with_downloads
|
||||
```
|
||||
|
||||
Whom should you ask for review:
|
||||
1. For changes to NeMo's core: @okuchaiev, @blisc, @titu1994, @tkornuta-nvidia, or @ericharper
|
||||
1. For changes to NeMo's ASR collection: @okuchaiev, @titu1994, @redoctopus, @blisc, or @vsl9
|
||||
1. For changes to NeMo's NLP collection: @ekmb, @yzhang123, @VahidooX, @vladgets, or @ericharper
|
||||
1. For changes to NeMo's TTS collection: @blisc or @stasbel
|
||||
## Whom should you ask for review:
|
||||
1. For changes to NeMo's core: @ericharper, @titu1994, @blisc, or @okuchaiev
|
||||
1. For changes to NeMo's ASR collection: @titu1994, @redoctopus, @jbalam-nv, or @okuchaiev
|
||||
1. For changes to NeMo's NLP collection: @MaximumEntropy, @ericharper, @ekmb, @yzhang123, @VahidooX, @vladgets, or @okuchaiev
|
||||
1. For changes to NeMo's TTS collection: @blisc or @stasbel, or @okuchaiev
|
||||
|
||||
Note that some people may self-assign to review your PR - in which case, please wait for them to add a review.
|
||||
|
||||
|
@ -27,7 +38,7 @@ Your pull requests must pass all checks and peer-review before they can be merg
|
|||
# General principles
|
||||
1. **User-oriented**: make it easy for end users, even at the cost of writing more code in the background
|
||||
1. **Robust**: make it hard for users to make mistakes.
|
||||
1. **Supporting of both training and inferencing**: if a module can only be used for training, write a companion module to be used during inference.
|
||||
1. **Well-tested**: please add simple, fast unittests. Consider adding CI tests for end-to-end functionality.
|
||||
1. **Reusable**: for every piece of code, think about how it can be reused in the future and make it easy to be reused.
|
||||
1. **Readable**: code should be easier to read.
|
||||
1. **Legal**: if you copy even one line of code from the Internet, make sure that the code allows the license that NeMo supports. Give credit and link back to the code.
|
||||
|
|
4
Jenkinsfile
vendored
4
Jenkinsfile
vendored
|
@ -54,7 +54,7 @@ pipeline {
|
|||
|
||||
stage('L0: Unit Tests GPU') {
|
||||
steps {
|
||||
sh 'pytest -m "unit and not skipduringci and not pleasefixme"'
|
||||
sh 'pytest -m "not pleasefixme" --with_downloads'
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -66,7 +66,7 @@ pipeline {
|
|||
}
|
||||
}
|
||||
steps {
|
||||
sh 'CUDA_VISIBLE_DEVICES="" pytest -m "unit and not pleasefixme" --cpu'
|
||||
sh 'CUDA_VISIBLE_DEVICES="" pytest -m "not pleasefixme" --cpu --with_downloads'
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -40,30 +40,35 @@ class TestHuggingFace(TestCase):
|
|||
pretrained_lm_models = nemo_nlp.modules.get_pretrained_lm_models_list()
|
||||
self.assertTrue(len(pretrained_lm_models) > 0)
|
||||
|
||||
@pytest.mark.with_downloads()
|
||||
@pytest.mark.unit
|
||||
def test_get_pretrained_bert_model(self):
|
||||
model = nemo_nlp.modules.get_lm_model(pretrained_model_name='bert-base-uncased')
|
||||
assert isinstance(model, nemo_nlp.modules.BertEncoder)
|
||||
do_export(model, "bert-base-uncased")
|
||||
|
||||
@pytest.mark.with_downloads()
|
||||
@pytest.mark.unit
|
||||
def test_get_pretrained_distilbert_model(self):
|
||||
model = nemo_nlp.modules.get_lm_model(pretrained_model_name='distilbert-base-uncased')
|
||||
assert isinstance(model, nemo_nlp.modules.DistilBertEncoder)
|
||||
do_export(model, "distilbert-base-uncased")
|
||||
|
||||
@pytest.mark.with_downloads()
|
||||
@pytest.mark.unit
|
||||
def test_get_pretrained_roberta_model(self):
|
||||
model = nemo_nlp.modules.get_lm_model(pretrained_model_name='roberta-base')
|
||||
assert isinstance(model, nemo_nlp.modules.RobertaEncoder)
|
||||
do_export(model, "roberta-base-uncased")
|
||||
|
||||
@pytest.mark.with_downloads()
|
||||
@pytest.mark.unit
|
||||
def test_get_pretrained_albert_model(self):
|
||||
model = nemo_nlp.modules.get_lm_model(pretrained_model_name='albert-base-v1')
|
||||
assert isinstance(model, nemo_nlp.modules.AlbertEncoder)
|
||||
do_export(model, "albert-base-v1")
|
||||
|
||||
@pytest.mark.with_downloads()
|
||||
@pytest.mark.unit
|
||||
def test_get_pretrained_chinese_bert_wwm_model(self):
|
||||
model_name = 'hfl/chinese-bert-wwm'
|
||||
|
@ -72,6 +77,7 @@ class TestHuggingFace(TestCase):
|
|||
tokenizer = get_tokenizer(tokenizer_name=model_name)
|
||||
assert isinstance(tokenizer, AutoTokenizer)
|
||||
|
||||
@pytest.mark.with_downloads()
|
||||
@pytest.mark.unit
|
||||
def test_get_pretrained_arabic_model(self):
|
||||
model_name = 'asafaya/bert-base-arabic'
|
||||
|
|
|
@ -31,6 +31,14 @@ import nemo.collections.nlp as nemo_nlp
|
|||
from nemo.core.classes import typecheck
|
||||
|
||||
|
||||
def get_pretrained_bert_345m_uncased_model():
|
||||
model_name = "megatron-bert-345m-uncased"
|
||||
model = nemo_nlp.modules.get_lm_model(pretrained_model_name=model_name)
|
||||
if torch.cuda.is_available():
|
||||
model = model.cuda()
|
||||
return model
|
||||
|
||||
|
||||
class TestMegatron(TestCase):
|
||||
@pytest.mark.run_only_on('GPU')
|
||||
@pytest.mark.unit
|
||||
|
@ -38,33 +46,31 @@ class TestMegatron(TestCase):
|
|||
pretrained_lm_models = nemo_nlp.modules.get_pretrained_lm_models_list()
|
||||
self.assertTrue(len(pretrained_lm_models) > 0)
|
||||
|
||||
@pytest.mark.skipif(not os.path.exists('/home/TestData/nlp'), reason='Not a Jenkins machine')
|
||||
@pytest.mark.with_downloads()
|
||||
@pytest.mark.run_only_on('GPU')
|
||||
@pytest.mark.unit
|
||||
def test_get_pretrained_bert_345m_uncased_model(self):
|
||||
model_name = "megatron-bert-345m-uncased"
|
||||
model = nemo_nlp.modules.get_lm_model(pretrained_model_name=model_name)
|
||||
if torch.cuda.is_available():
|
||||
model = model.cuda()
|
||||
|
||||
def test_get_model(self):
|
||||
model = get_pretrained_bert_345m_uncased_model()
|
||||
assert isinstance(model, nemo_nlp.modules.MegatronBertEncoder)
|
||||
|
||||
typecheck.set_typecheck_enabled(enabled=False)
|
||||
inp = model.input_example()
|
||||
out = model.forward(*inp)
|
||||
typecheck.set_typecheck_enabled(enabled=True)
|
||||
self.model = model
|
||||
|
||||
@pytest.mark.run_only_on('GPU')
|
||||
@pytest.mark.unit
|
||||
@pytest.mark.skip('ONNX export is broken in PyTorch')
|
||||
def test_onnx_export(self):
|
||||
assert self.model
|
||||
model = get_pretrained_bert_345m_uncased_model()
|
||||
assert model
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
# Generate filename in the temporary directory.
|
||||
# Test export.
|
||||
self.model.export(os.path.join(tmpdir, "megatron.onnx"))
|
||||
model.export(os.path.join(tmpdir, "megatron.onnx"))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
t = TestMegatron()
|
||||
t.test_get_pretrained_bert_345m_uncased_model()
|
||||
t.test_get_model()
|
||||
|
|
|
@ -55,6 +55,7 @@ def data_exists(data_dir):
|
|||
|
||||
|
||||
class TestPretrainedModelPerformance(TestCase):
|
||||
@pytest.mark.with_downloads()
|
||||
@pytest.mark.unit
|
||||
@pytest.mark.run_only_on('GPU')
|
||||
@pytest.mark.skipif(
|
||||
|
@ -83,6 +84,7 @@ class TestPretrainedModelPerformance(TestCase):
|
|||
@pytest.mark.skipif(
|
||||
not data_exists('/home/TestData/nlp/token_classification_punctuation/fisher'), reason='Not a Jenkins machine'
|
||||
)
|
||||
@pytest.mark.with_downloads()
|
||||
def test_punct_capit_with_distilbert(self):
|
||||
data_dir = '/home/TestData/nlp/token_classification_punctuation/fisher'
|
||||
model = models.PunctuationCapitalizationModel.from_pretrained("punctuation_en_distilbert")
|
||||
|
@ -93,6 +95,7 @@ class TestPretrainedModelPerformance(TestCase):
|
|||
assert abs(metrics['punct_f1'] - 52.4225) < 0.001
|
||||
assert int(model.punct_class_report.total_examples) == 128
|
||||
|
||||
@pytest.mark.with_downloads()
|
||||
@pytest.mark.unit
|
||||
@pytest.mark.run_only_on('GPU')
|
||||
@pytest.mark.skipif(
|
||||
|
|
|
@ -42,6 +42,11 @@ def pytest_addoption(parser):
|
|||
action='store_true',
|
||||
help="pass that argument to use local test data/skip downloading from URL/GitHub (DEFAULT: False)",
|
||||
)
|
||||
parser.addoption(
|
||||
'--with_downloads',
|
||||
action='store_true',
|
||||
help="pass this argument to active tests which download models from the cloud.",
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
|
@ -60,6 +65,15 @@ def run_only_on_device_fixture(request, device):
|
|||
pytest.skip('skipped on this device: {}'.format(device))
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def downloads_weights(request, device):
|
||||
if request.node.get_closest_marker('with_downloads'):
|
||||
if not request.config.getoption("--with_downloads"):
|
||||
pytest.skip(
|
||||
'To run this test, pass --with_downloads option. It will download (and cache) models from cloud.'
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def cleanup_local_folder():
|
||||
# Asserts in fixture are not recommended, but I'd rather stop users from deleting expensive training runs
|
||||
|
|
|
@ -120,24 +120,28 @@ class TestSaveRestore:
|
|||
|
||||
return model_copy
|
||||
|
||||
@pytest.mark.with_downloads()
|
||||
@pytest.mark.unit
|
||||
def test_EncDecCTCModel(self):
|
||||
# TODO: Switch to using named configs because here we don't really care about weights
|
||||
qn = EncDecCTCModel.from_pretrained(model_name="QuartzNet15x5Base-En")
|
||||
self.__test_restore_elsewhere(model=qn, attr_for_eq_check=set(["decoder._feat_in", "decoder._num_classes"]))
|
||||
|
||||
@pytest.mark.with_downloads()
|
||||
@pytest.mark.unit
|
||||
def test_EncDecCTCModelBPE(self):
|
||||
# TODO: Switch to using named configs because here we don't really care about weights
|
||||
cn = EncDecCTCModelBPE.from_pretrained(model_name="stt_en_citrinet_256")
|
||||
self.__test_restore_elsewhere(model=cn, attr_for_eq_check=set(["decoder._feat_in", "decoder._num_classes"]))
|
||||
|
||||
@pytest.mark.with_downloads()
|
||||
@pytest.mark.unit
|
||||
def test_EncDecCTCModelBPE_v2(self):
|
||||
# TODO: Switch to using named configs because here we don't really care about weights
|
||||
cn = EncDecCTCModelBPE.from_pretrained(model_name="stt_en_conformer_ctc_small")
|
||||
self.__test_restore_elsewhere(model=cn, attr_for_eq_check=set(["decoder._feat_in", "decoder._num_classes"]))
|
||||
|
||||
@pytest.mark.with_downloads()
|
||||
@pytest.mark.unit
|
||||
def test_PunctuationCapitalization(self):
|
||||
# TODO: Switch to using named configs because here we don't really care about weights
|
||||
|
|
Loading…
Reference in a new issue