NeMo/Jenkinsfile
Samuel Kriman b7a175b7b9
Self-supervised pre-training for speech models (#3139)
* self-supervised training

Signed-off-by: sam1373 <samuelkriman@gmail.com>

* test

Signed-off-by: sam1373 <samuelkriman@gmail.com>

* remove imports

Signed-off-by: sam1373 <samuelkriman@gmail.com>

* fix

Signed-off-by: sam1373 <samuelkriman@gmail.com>

* sort imports

Signed-off-by: sam1373 <samuelkriman@gmail.com>

* fix audio_to_text

Signed-off-by: sam1373 <samuelkriman@gmail.com>

* manifest handle no text

Signed-off-by: sam1373 <samuelkriman@gmail.com>

* loss init

Signed-off-by: sam1373 <samuelkriman@gmail.com>

* style

Signed-off-by: sam1373 <samuelkriman@gmail.com>

* remove tokenizer from config

Signed-off-by: sam1373 <samuelkriman@gmail.com>

* config changes

Signed-off-by: sam1373 <samuelkriman@gmail.com>

* remove hydra import

Signed-off-by: sam1373 <samuelkriman@gmail.com>

* always spec augment

Signed-off-by: sam1373 <samuelkriman@gmail.com>

* fixes

Signed-off-by: sam1373 <samuelkriman@gmail.com>

* copyright

Signed-off-by: sam1373 <samuelkriman@gmail.com>

* fix cosine sim

Signed-off-by: sam1373 <samuelkriman@gmail.com>

* fix cosine sim

Signed-off-by: sam1373 <samuelkriman@gmail.com>

* fix cosine sim

Signed-off-by: sam1373 <samuelkriman@gmail.com>

* changes based on comments

Signed-off-by: sam1373 <samuelkriman@gmail.com>

* changes based on comments

Signed-off-by: sam1373 <samuelkriman@gmail.com>

* configs

Signed-off-by: sam1373 <samuelkriman@gmail.com>

* name fix

Signed-off-by: sam1373 <samuelkriman@gmail.com>

* ci config changes

Signed-off-by: sam1373 <samuelkriman@gmail.com>

* renamed to num_negatives

Signed-off-by: sam1373 <samuelkriman@gmail.com>

* minor changes

Signed-off-by: sam1373 <samuelkriman@gmail.com>

* name changes, type annotations

Signed-off-by: sam1373 <samuelkriman@gmail.com>

Co-authored-by: Yang Zhang <yzhang123@users.noreply.github.com>
2021-11-10 15:33:11 -08:00

1775 lines
75 KiB
Groovy

pipeline {
agent {
docker {
image 'nvcr.io/nvidia/pytorch:21.10-py3'
args '--device=/dev/nvidia0 --gpus all --user 0:128 -v /home/TestData:/home/TestData -v $HOME/.cache/torch:/root/.cache/torch --shm-size=8g'
}
}
options {
timeout(time: 2, unit: 'HOURS')
disableConcurrentBuilds()
}
stages {
stage('PyTorch version') {
steps {
sh 'python -c "import torch; print(torch.__version__)"'
sh 'python -c "import torchvision; print(torchvision.__version__)"'
}
}
stage('Install test requirements') {
steps {
sh 'apt-get update && apt-get install -y bc && pip install -r requirements/requirements_test.txt'
}
}
stage('Code formatting checks') {
steps {
sh 'python setup.py style'
}
}
stage('Copyright Headers check') {
steps {
sh 'python tests/check_copyright_header.py --dir .'
}
}
stage('Torch TTS unit tests') {
when {
anyOf {
branch 'main'
changeRequest target: 'main'
}
}
steps {
sh 'pip install ".[torch_tts]"'
sh 'pip list'
sh 'test $(pip list | grep -c lightning) -eq 0'
sh 'test $(pip list | grep -c omegaconf) -eq 0'
sh 'test $(pip list | grep -c hydra) -eq 0'
sh 'pytest -m "torch_tts" --cpu tests/collections/tts/test_torch_tts.py --relax_numba_compat'
}
}
stage('NeMo Installation') {
steps {
sh './reinstall.sh release'
}
}
stage('PyTorch Lightning version') {
steps {
sh 'python -c "import pytorch_lightning; print(pytorch_lightning.__version__)"'
}
}
stage('PyTorch Lightning DDP Checks') {
steps {
sh 'CUDA_VISIBLE_DEVICES="0,1" python "tests/core_ptl/check_for_ranks.py"'
}
}
stage('Basic Import Checks') {
steps {
sh 'python -c "import nemo.collections.asr as nemo_asr"'
sh 'python -c "import nemo.collections.nlp as nemo_nlp"'
sh 'python -c "import nemo.collections.tts as nemo_tts"'
}
}
stage('L0: Unit Tests GPU') {
steps {
sh 'NEMO_NUMBA_MINVER=0.55 pytest -m "not pleasefixme and not torch_tts" --with_downloads'
}
}
stage('L0: Unit Tests CPU') {
when {
anyOf {
branch 'main'
changeRequest target: 'main'
}
}
steps {
sh 'CUDA_VISIBLE_DEVICES="" NEMO_NUMBA_MINVER=0.55 pytest -m "not pleasefixme and not torch_tts" --cpu --with_downloads --relax_numba_compat'
}
}
stage('L0: TN/ITN Tests CPU') {
when {
anyOf {
branch 'main'
changeRequest target: 'main'
}
}
failFast true
parallel {
stage('En TN grammars') {
steps {
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py "1" --cache_dir /home/TestData/nlp/text_norm/ci/grammars/9-13'
}
}
stage('En ITN grammars') {
steps {
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --language en "twenty" --cache_dir /home/TestData/nlp/text_norm/ci/grammars/9-13'
}
}
stage('German ITN') {
steps {
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --language de "zwanzig" --cache_dir /home/TestData/nlp/text_norm/ci/grammars/9-13'
sh 'CUDA_VISIBLE_DEVICES="" pytest tests/nemo_text_processing/de -m "not pleasefixme" --cpu --tn_cache_dir /home/TestData/nlp/text_norm/ci/grammars/9-13'
}
}
stage('Spanish ITN') {
steps {
sh 'CUDA_VISIBLE_DEVICES="" pytest tests/nemo_text_processing/es -m "not pleasefixme" --cpu --tn_cache_dir /home/TestData/nlp/text_norm/ci/grammars/Spanish/9-13'
}
}
stage('Create En non-deterministic TN & Run all En TN/ITN tests') {
steps {
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize_with_audio.py --text "\$.01" --n_tagged 2 --cache_dir /home/TestData/nlp/text_norm/ci/grammars/9-13'
sh 'CUDA_VISIBLE_DEVICES="" pytest tests/nemo_text_processing/en/ -m "not pleasefixme" --cpu --tn_cache_dir /home/TestData/nlp/text_norm/ci/grammars/9-13'
}
}
stage('Run Ru ITN and non-deterministic TN & Run all Ru ITN tests') {
steps {
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py "двадцать" --cache_dir /home/TestData/nlp/text_norm/ci/grammars/9-13 --language ru'
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize_with_audio.py --text "25" --n_tagged 2 --cache_dir /home/TestData/nlp/text_norm/ci/grammars/9-13 --language ru'
sh 'CUDA_VISIBLE_DEVICES="" pytest tests/nemo_text_processing/ru/test_ru_inverse_normalization.py -m "not pleasefixme" --cpu --tn_cache_dir /home/TestData/nlp/text_norm/ci/grammars/9-13'
}
}
}
}
stage('L2: NeMo text processing') {
when {
anyOf {
branch 'main'
changeRequest target: 'main'
}
}
failFast true
parallel {
stage('L2: Eng TN') {
steps {
sh 'cd tools/text_processing_deployment && python pynini_export.py --output=/home/TestData/nlp/text_norm/output/ --grammars=tn_grammars --cache_dir /home/TestData/nlp/text_norm/ci/grammars/9-13 --language=en && ls -R /home/TestData/nlp/text_norm/output/ && echo ".far files created "|| exit 1'
sh 'cd nemo_text_processing/text_normalization/ && python run_predict.py --input=/home/TestData/nlp/text_norm/ci/test.txt --input_case="lower_cased" --language=en --output=/home/TestData/nlp/text_norm/output/test.pynini.txt --verbose'
sh 'cmp --silent /home/TestData/nlp/text_norm/output/test.pynini.txt /home/TestData/nlp/text_norm/ci/test_goal_py.txt || exit 1'
sh 'rm -rf /home/TestData/nlp/text_norm/output/*'
}
}
stage('L2: Eng ITN export') {
steps {
sh 'cd tools/text_processing_deployment && python pynini_export.py --output=/home/TestData/nlp/text_denorm/output/ --grammars=itn_grammars --cache_dir /home/TestData/nlp/text_norm/ci/grammars/9-13 --language=en && ls -R /home/TestData/nlp/text_denorm/output/ && echo ".far files created "|| exit 1'
sh 'cd nemo_text_processing/inverse_text_normalization/ && python run_predict.py --input=/home/TestData/nlp/text_denorm/ci/test.txt --language=en --output=/home/TestData/nlp/text_denorm/output/test.pynini.txt --verbose'
sh 'cmp --silent /home/TestData/nlp/text_denorm/output/test.pynini.txt /home/TestData/nlp/text_denorm/ci/test_goal_py.txt || exit 1'
sh 'rm -rf /home/TestData/nlp/text_denorm/output/*'
}
}
// stage('L2: TN with Audio (audio and raw text)') {
// steps {
// sh 'cd nemo_text_processing/text_normalization && \
// python normalize_with_audio.py --language=en --cache_dir /home/TestData/nlp/text_norm/ci/grammars/9-13 --text "The total amounts to \\$4.76." \
// --audio_data /home/TestData/nlp/text_norm/audio_based/audio.wav | tail -n2 | head -n1 > /home/TestData/nlp/text_norm/audio_based/output/out_raw.txt 2>&1 && \
// cmp --silent /home/TestData/nlp/text_norm/audio_based/output/out_raw.txt /home/TestData/nlp/text_norm/audio_based/result.txt || exit 1'
// sh 'rm -rf /home/TestData/nlp/text_norm/audio_based/output/out_raw.txt'
// }
// }
// stage('L2: TN with Audio (audio and text file)') {
// steps {
// sh 'cd nemo_text_processing/text_normalization && \
// python normalize_with_audio.py --language=en --cache_dir /home/TestData/nlp/text_norm/ci/grammars/9-13 --text /home/TestData/nlp/text_norm/audio_based/text.txt \
// --audio_data /home/TestData/nlp/text_norm/audio_based/audio.wav | tail -n2 | head -n1 > /home/TestData/nlp/text_norm/audio_based/output/out_file.txt 2>&1 && \
// cmp --silent /home/TestData/nlp/text_norm/audio_based/output/out_file.txt /home/TestData/nlp/text_norm/audio_based/result.txt || exit 1'
// sh 'rm -rf /home/TestData/nlp/text_norm/audio_based/output/out_file.txt'
// }
// }
// stage('L2: TN with Audio (manifest)') {
// steps {
// sh 'cd nemo_text_processing/text_normalization && \
// python normalize_with_audio.py --language=en --audio_data /home/TestData/nlp/text_norm/audio_based/manifest.json --n_tagged=120 --cache_dir /home/TestData/nlp/text_norm/ci/grammars/9-13 && \
// cmp --silent /home/TestData/nlp/text_norm/audio_based/manifest_normalized.json /home/TestData/nlp/text_norm/audio_based/manifest_result.json || exit 1'
// sh 'rm -rf /home/TestData/nlp/text_norm/audio_based/manifest_normalized.json'
// }
// }
}
}
stage('L0: Computer Vision Integration') {
when {
anyOf {
branch 'main'
changeRequest target: 'main'
}
}
failFast true
parallel {
stage ('MNIST image classification with LeNet-5 Integration Test - on CPU') {
steps {
sh 'cd examples/cv && \
python mnist_lenet5_image_classification_pure_lightning.py trainer.gpus=0 \
trainer.accelerator=null \
trainer.fast_dev_run=true model.dataset.data_folder=/home/TestData \
&& rm -rf outputs'
}
}
}
}
// We have no integration tests, please enable this when one is added
// stage('L0: Integration Tests GPU') {
// steps {
// sh 'pytest -s -m "integration and not skipduringci and not pleasefixme"'
// }
// }
// stage('L0: Integration Tests CPU') {
// when {
// anyOf{
// branch 'main'
// changeRequest target: 'main'
// }
// }
// steps {
// sh 'pytest -s -m "integration and not pleasefixme" --cpu'
// }
// }
// We have no system tests, please enable this when one is added
// stage('L1: System Tests GPU') {
// steps {
// sh 'pytest -m "system and not skipduringci and not pleasefixme"'
// }
// }
// stage('L1: System Tests CPU') {
// when {
// anyOf{
// branch 'dev
// changeRequest target: 'main'
// }
// }
// steps {
// sh 'pytest -m "system and not pleasefixme" --cpu'
// }
// }
stage('L2: ASR dev run') {
when {
anyOf {
branch 'main'
changeRequest target: 'main'
}
}
failFast true
parallel {
stage('Speech to Text') {
steps {
sh 'python examples/asr/speech_to_text.py \
model.train_ds.manifest_filepath=/home/TestData/an4_dataset/an4_train.json \
model.validation_ds.manifest_filepath=/home/TestData/an4_dataset/an4_val.json \
trainer.gpus=[0] \
+trainer.fast_dev_run=True \
exp_manager.exp_dir=examples/asr/speech_to_text_results'
sh 'rm -rf examples/asr/speech_to_text_results'
}
}
stage('Speech to Label') {
steps {
sh 'python examples/asr/speech_to_label.py \
model.train_ds.manifest_filepath=/home/TestData/speech_commands/train_manifest.json \
model.validation_ds.manifest_filepath=/home/TestData/speech_commands/test_manifest.json \
model.test_ds.manifest_filepath=/home/TestData/speech_commands/test_manifest.json \
trainer.gpus=[1] \
+trainer.fast_dev_run=True \
model.preprocessor._target_=nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor \
~model.preprocessor.window_size \
~model.preprocessor.window_stride \
~model.preprocessor.window \
~model.preprocessor.n_mels \
~model.preprocessor.n_mfcc \
~model.preprocessor.n_fft \
exp_manager.exp_dir=examples/asr/speech_to_label_results'
sh 'rm -rf examples/asr/speech_to_label_results'
}
}
stage('Speaker Recognition') {
steps {
sh 'python examples/speaker_tasks/recognition/speaker_reco.py \
model.train_ds.batch_size=10 \
model.validation_ds.batch_size=2 \
model.train_ds.manifest_filepath=/home/TestData/an4_speaker/train.json \
model.validation_ds.manifest_filepath=/home/TestData/an4_speaker/dev.json \
model.test_ds.manifest_filepath=/home/TestData/an4_speaker/test.json \
trainer.gpus=[1] \
+trainer.fast_dev_run=True \
exp_manager.exp_dir=examples/speaker_tasks/recognition/speaker_recognition_results'
sh 'rm -rf examples/speaker_tasks/recognition/speaker_recognition_results'
}
}
stage('Speaker Diarization Inference') {
steps {
sh 'python examples/speaker_tasks/diarization/offline_diarization.py \
diarizer.manifest_filepath=/home/TestData/an4_diarizer/an4_manifest.json \
diarizer.speaker_embeddings.model_path=/home/TestData/an4_diarizer/spkr.nemo \
diarizer.vad.model_path=/home/TestData/an4_diarizer/MatchboxNet_VAD_3x2.nemo \
diarizer.out_dir=examples/speaker_tasks/diarization/speaker_diarization_results'
sh 'rm -rf examples/speaker_tasks/diarization/speaker_diarization_results'
}
}
stage('Speaker Diarization with ASR Inference') {
steps {
sh 'python examples/speaker_tasks/diarization/offline_diarization_with_asr.py \
diarizer.manifest_filepath=/home/TestData/an4_diarizer/an4_manifest.json \
diarizer.speaker_embeddings.model_path=/home/TestData/an4_diarizer/spkr.nemo \
diarizer.asr.model_path=QuartzNet15x5Base-En \
diarizer.asr.parameters.asr_based_vad=True \
diarizer.out_dir=examples/speaker_tasks/diarization/speaker_diarization_asr_results'
sh 'rm -rf examples/speaker_tasks/diarization/speaker_diarization_asr_results'
}
}
stage('L2: Speech to Text WPE - CitriNet') {
steps {
sh 'python examples/asr/speech_to_text_bpe.py \
--config-path="conf/citrinet/" --config-name="config_bpe" \
model.train_ds.manifest_filepath=/home/TestData/an4_dataset/an4_train.json \
model.validation_ds.manifest_filepath=/home/TestData/an4_dataset/an4_val.json \
model.tokenizer.dir="/home/TestData/asr_tokenizers/an4_wpe_128/" \
model.tokenizer.type="wpe" \
trainer.gpus=[1] \
+trainer.fast_dev_run=True \
exp_manager.exp_dir=examples/asr/speech_to_text_wpe_results'
sh 'rm -rf examples/asr/speech_to_text_wpe_results'
}
}
stage('L2: Speech Pre-training - CitriNet') {
steps {
sh 'python examples/asr/speech_pre_training.py \
--config-path="conf/citrinet_ssl/" --config-name="citrinet_ssl_ci" \
model.train_ds.manifest_filepath=/home/TestData/an4_dataset/an4_train.json \
model.validation_ds.manifest_filepath=/home/TestData/an4_dataset/an4_val.json \
trainer.gpus=[1] \
+trainer.fast_dev_run=True \
exp_manager.exp_dir=examples/asr/speech_pre_training_results'
sh 'rm -rf examples/asr/speech_pre_training_results'
}
}
stage('L2: Speech to Text WPE - Conformer') {
steps {
sh 'python examples/asr/speech_to_text_bpe.py \
--config-path="conf/conformer" --config-name="conformer_ctc_bpe" \
model.train_ds.manifest_filepath=/home/TestData/an4_dataset/an4_train.json \
model.validation_ds.manifest_filepath=/home/TestData/an4_dataset/an4_val.json \
model.tokenizer.dir="/home/TestData/asr_tokenizers/an4_wpe_128/" \
model.tokenizer.type="wpe" \
model.train_ds.batch_size=4 \
model.validation_ds.batch_size=4 \
trainer.gpus=[1] \
+trainer.fast_dev_run=True \
exp_manager.exp_dir=examples/asr/speech_to_text_wpe_conformer_results'
sh 'rm -rf examples/asr/speech_to_text_wpe_conformer_results'
}
}
}
}
// TODO: Enable test after 21.08 container is used.
// stage('L2: ASR DALI dev run') {
// when {
// anyOf {
// branch 'main'
// changeRequest target: 'main'
// }
// }
// failFast true
// parallel {
// stage('Speech to Text - DALI AudioToMelSpectrogramPreprocessor') {
// steps {
// sh 'python examples/asr/speech_to_text.py \
// model.train_ds.manifest_filepath=/home/TestData/an4_dataset/an4_train.json \
// +model.train_ds.use_dali=True \
// model.validation_ds.manifest_filepath=/home/TestData/an4_dataset/an4_val.json \
// +model.validation_ds.use_dali=True \
// trainer.gpus=[0] \
// +trainer.fast_dev_run=True \
// exp_manager.exp_dir=examples/asr/speech_to_text_results'
// sh 'rm -rf examples/asr/speech_to_text_results'
// }
// }
// stage('Speech to Text BPE - DALI AudioToMelSpectrogramPreprocessor') {
// steps {
// sh 'python examples/asr/speech_to_text_bpe.py \
// --config-path="conf/citrinet/" --config-name="config_bpe" \
// model.tokenizer.dir="/home/TestData/asr_tokenizers/an4_wpe_128/" \
// model.tokenizer.type="wpe" \
// model.train_ds.manifest_filepath=/home/TestData/an4_dataset/an4_train.json \
// +model.train_ds.use_dali=True \
// model.validation_ds.manifest_filepath=/home/TestData/an4_dataset/an4_val.json \
// +model.validation_ds.use_dali=True \
// trainer.gpus=[0] \
// +trainer.fast_dev_run=True \
// exp_manager.exp_dir=examples/asr/speech_to_text_wpe_results'
// sh 'rm -rf examples/asr/speech_to_text_wpe_results'
// }
// }
// // TODO: This would fail due to an unnecessary torchaudio import.
// // To be enabled once torchaudio is available in the container used for CI
// // stage('Speech to Text - DALI AudioToMFCCPreprocessor') {
// // steps {
// // sh 'python examples/asr/speech_to_text.py \
// // model.train_ds.manifest_filepath=/home/TestData/an4_dataset/an4_train.json \
// // +model.train_ds.use_dali=True \
// // model.validation_ds.manifest_filepath=/home/TestData/an4_dataset/an4_val.json \
// // +model.validation_ds.use_dali=True \
// // model.preprocessor._target_=nemo.collections.asr.modules.AudioToMFCCPreprocessor \
// // ~model.preprocessor.normalize \
// // ~model.preprocessor.features \
// // ~model.preprocessor.frame_splicing \
// // ~model.preprocessor.dither \
// // ~model.preprocessor.stft_conv \
// // +model.n_mels=64 \
// // +model.n_mfcc=64 \
// // trainer.gpus=[0] \
// // +trainer.fast_dev_run=True \
// // exp_manager.exp_dir=examples/asr/speech_to_text_results'
// // sh 'rm -rf examples/asr/speech_to_text_results'
// // }
// // }
// }
// }
// TODO: Add back once CI is updated
// stage('L2: ASR RNNT dev run') {
// when {
// anyOf {
// branch 'main'
// changeRequest target: 'main'
// }
// }
// failFast true
// parallel {
// stage('Speech to Text - RNNT') {
// steps {
// sh 'STRICT_NUMBA_COMPAT_CHECK=false python examples/asr/speech_to_text_rnnt.py \
// --config-path="conf/contextnet_rnnt/" --config-name="config_rnnt.yaml" \
// model.train_ds.manifest_filepath=/home/TestData/an4_dataset/an4_train.json \
// model.validation_ds.manifest_filepath=/home/TestData/an4_dataset/an4_val.json \
// model.train_ds.batch_size=2 \
// model.validation_ds.batch_size=2 \
// trainer.gpus=[0] \
// +trainer.fast_dev_run=True \
// exp_manager.exp_dir=examples/asr/speech_to_text_rnnt_results'
// sh 'rm -rf examples/asr/speech_to_text_rnnt_results'
// }
// }
// stage('L2: Speech to Text RNNT WPE') {
// steps {
// sh 'STRICT_NUMBA_COMPAT_CHECK=false python examples/asr/speech_to_text_rnnt_bpe.py \
// --config-path="conf/contextnet_rnnt/" --config-name="config_rnnt_bpe.yaml" \
// model.train_ds.manifest_filepath=/home/TestData/an4_dataset/an4_train.json \
// model.validation_ds.manifest_filepath=/home/TestData/an4_dataset/an4_val.json \
// model.train_ds.batch_size=2 \
// model.validation_ds.batch_size=2 \
// model.tokenizer.dir="/home/TestData/asr_tokenizers/an4_wpe_128/" \
// model.tokenizer.type="wpe" \
// trainer.gpus=[0] \
// +trainer.fast_dev_run=True \
// exp_manager.exp_dir=examples/asr/speech_to_text_rnnt_wpe_results'
// sh 'rm -rf examples/asr/speech_to_text_rnnt_wpe_results'
// }
// }
// }
// }
stage('L2: ASR Multi-dataloader dev run') {
when {
anyOf {
branch 'main'
changeRequest target: 'main'
}
}
failFast true
parallel {
stage('Speech to Text multi-dataloader') {
steps {
sh 'python examples/asr/speech_to_text.py \
model.train_ds.manifest_filepath=/home/TestData/an4_dataset/an4_train.json \
model.validation_ds.manifest_filepath=[/home/TestData/an4_dataset/an4_val.json,/home/TestData/an4_dataset/an4_val.json] \
trainer.gpus=[0] \
trainer.max_epochs=1 \
+trainer.max_steps=1 \
+trainer.num_sanity_val_steps=1 \
exp_manager.exp_dir=examples/asr/speech_to_text_results'
sh 'rm -rf examples/asr/speech_to_text_results'
}
}
stage('Speech to Label multi-dataloader') {
steps {
sh 'python examples/asr/speech_to_label.py \
model.train_ds.manifest_filepath=/home/TestData/speech_commands/train_manifest.json \
model.validation_ds.manifest_filepath=[/home/TestData/speech_commands/test_manifest.json,/home/TestData/speech_commands/test_manifest.json] \
trainer.gpus=[1] \
trainer.max_epochs=1 \
+trainer.max_steps=1 \
+trainer.num_sanity_val_steps=1 \
model.preprocessor._target_=nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor \
~model.preprocessor.window_size \
~model.preprocessor.window_stride \
~model.preprocessor.window \
~model.preprocessor.n_mels \
~model.preprocessor.n_mfcc \
~model.preprocessor.n_fft \
exp_manager.exp_dir=examples/asr/speech_to_label_results'
sh 'rm -rf examples/asr/speech_to_label_results'
}
}
}
}
stage('L2: Speech Transcription') {
when {
anyOf {
branch 'main'
changeRequest target: 'main'
}
}
failFast true
parallel {
stage('Speech to Text Transcribe') {
steps {
sh 'python examples/asr/transcribe_speech.py \
pretrained_name="QuartzNet15x5Base-En" \
audio_dir="/home/TestData/an4_transcribe/test_subset/" \
output_filename="stt_test_res.json" \
cuda=true \
amp=true'
sh 'rm -rf stt_test_res.json'
}
}
}
}
stage('L2: Segmentation Tool') {
when {
anyOf {
branch 'main'
changeRequest target: 'main'
}
}
stages {
stage('Install ctc_segmentation requirements') {
steps {
sh 'cd tools/ctc_segmentation && \
pip install -r requirements.txt && \
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends ffmpeg'
}
}
stage('Parallel ctc_segmentation test') {
failFast true
parallel {
stage('L2: Eng QN with .wav') {
steps {
sh 'cd tools/ctc_segmentation && \
TIME=`date +"%Y-%m-%d-%T"` && \
/bin/bash run_sample.sh \
--MODEL_NAME_OR_PATH=QuartzNet15x5Base-En \
--DATA_DIR=/home/TestData/ctc_segmentation/eng \
--OUTPUT_DIR=/home/TestData/ctc_segmentation/eng/output${TIME} \
--LANGUAGE=eng \
--OFFSET=0 \
--CUT_PREFIX=0 \
--MIN_SEGMENT_LEN=0 \
--AUDIO_FORMAT=.wav && \
python /home/TestData/ctc_segmentation/verify_alignment.py \
-r /home/TestData/ctc_segmentation/eng/eng_valid_segments.txt \
-g /home/TestData/ctc_segmentation/eng/output${TIME}/verified_segments/nv_test_segments.txt && \
rm -rf /home/TestData/ctc_segmentation/eng/output${TIME}'
}
}
stage('L2: Ru QN with .mp3') {
steps {
sh 'cd tools/ctc_segmentation && \
TIME=`date +"%Y-%m-%d-%T"` && \
/bin/bash run_sample.sh \
--MODEL_NAME_OR_PATH=/home/TestData/ctc_segmentation/QuartzNet15x5-Ru-e512-wer14.45.nemo \
--DATA_DIR=/home/TestData/ctc_segmentation/ru \
--OUTPUT_DIR=/home/TestData/ctc_segmentation/ru/output${TIME} \
--LANGUAGE=ru \
--OFFSET=0 \
--CUT_PREFIX=0 \
--MIN_SEGMENT_LEN=0 \
--AUDIO_FORMAT=.mp3 \
--ADDITIONAL_SPLIT_SYMBOLS=";" && \
python /home/TestData/ctc_segmentation/verify_alignment.py \
-r /home/TestData/ctc_segmentation/ru/valid_ru_segments.txt \
-g /home/TestData/ctc_segmentation/ru/output${TIME}/verified_segments/ru_segments.txt && \
rm -rf /home/TestData/ctc_segmentation/ru/output${TIME}'
}
}
}
}
}
}
// TODO: add test once megatron-bert is supported again
// stage('L2: Multi-GPU Megatron finetuning') {
// when {
// anyOf {
// branch 'main'
// changeRequest target: 'main'
// }
// }
// failFast true
// parallel {
// stage('L2: Cased Megatron finetuning on MRPC') {
// steps {
// sh 'cd examples/nlp/glue_benchmark && \
// python glue_benchmark.py \
// model.dataset.data_dir=/home/TestData/nlp/glue_fake/MRPC \
// trainer.gpus=[0,1] \
// +trainer.fast_dev_run=true \
// model.dataset.use_cache=false \
// model.language_model.pretrained_model_name=megatron-bert-345m-cased \
// trainer.accelerator=ddp \
// exp_manager=null'
// }
// }
// }
// }
stage('L2: SGD-QA') {
when {
anyOf {
branch 'main'
changeRequest target: 'main'
}
}
failFast true
parallel {
stage('L2: SGD-QA') {
steps {
sh 'cd examples/nlp/dialogue_state_tracking && \
python sgd_qa.py \
model.dataset.data_dir=/home/TestData/nlp/sgd_small \
model.dataset.dialogues_example_dir=sgd_outputs \
model.dataset.task_name=debug_sample \
trainer.max_steps=1 \
trainer.max_epochs=1 \
model.train_ds.batch_size=2 \
model.validation_ds.batch_size=2 \
model.test_ds.batch_size=2 \
model.nemo_path=null \
trainer.val_check_interval=0.0 \
trainer.gpus=[0,1] \
model.dataset.use_cache=false \
model.language_model.pretrained_model_name=bert-base-cased \
trainer.accelerator=ddp \
exp_manager=null && \
rm -rf sgd_outputs'
}
}
stage('GLUE STS-b with AlBERT') {
steps {
sh 'python examples/nlp/glue_benchmark/glue_benchmark.py \
model.dataset.use_cache=false \
model.task_name=sts-b \
model.dataset.data_dir=/home/TestData/nlp/glue_fake/STS-B \
trainer.gpus=[1] \
+trainer.fast_dev_run=True \
model.language_model.pretrained_model_name=albert-base-v1 \
exp_manager=null'
}
}
stage('Test Restore with AlBERT') {
steps {
sh 'python examples/nlp/token_classification/punctuation_capitalization_evaluate.py \
pretrained_model=/home/TestData/nlp/pretrained_models/Punctuation_and_Capitalization_albert.nemo \
model.dataset.use_cache=false \
model.dataset.data_dir=/home/TestData/nlp/token_classification_punctuation/ \
trainer.gpus=[1] \
exp_manager=null'
}
}
stage('Test Restore with RoBERTa') {
steps {
sh 'python examples/nlp/token_classification/punctuation_capitalization_evaluate.py \
pretrained_model=/home/TestData/nlp/pretrained_models/Punctuation_and_Capitalization_roberta.nemo \
model.dataset.use_cache=false \
model.dataset.data_dir=/home/TestData/nlp/token_classification_punctuation/ \
trainer.gpus=[1] \
exp_manager=null'
}
}
}
}
stage('L2: Parallel BERT SQUAD v1.1 / v2.0') {
when {
anyOf {
branch 'main'
changeRequest target: 'main'
}
}
failFast true
parallel {
stage('BERT SQUAD 1.1') {
// Cannot do fast_dev_run because squad needs whole dev dataset
steps {
sh 'cd examples/nlp/question_answering && \
python question_answering_squad.py \
model.train_ds.file=/home/TestData/nlp/squad_mini/v1.1/train-v1.1.json \
model.dataset.use_cache=false \
model.validation_ds.file=/home/TestData/nlp/squad_mini/v1.1/dev-v1.1.json \
model.test_ds.file=/home/TestData/nlp/squad_mini/v1.1/dev-v1.1.json \
model.train_ds.batch_size=2 \
model.train_ds.num_samples=2 \
model.validation_ds.batch_size=2 \
model.validation_ds.num_samples=2 \
model.test_ds.num_samples=2 \
model.test_ds.batch_size=2 \
trainer.max_epochs=1 \
+trainer.max_steps=1 \
model.language_model.pretrained_model_name=bert-base-uncased \
model.dataset.version_2_with_negative=false \
trainer.precision=16 \
trainer.gpus=[0] \
exp_manager=null'
}
}
stage('BERT SQUAD 2.0') {
// Cannot do fast_dev_run because squad needs whole dev dataset
steps {
sh 'cd examples/nlp/question_answering && \
python question_answering_squad.py \
model.train_ds.file=/home/TestData/nlp/squad_mini/v2.0/train-v2.0.json \
model.dataset.use_cache=false \
model.train_ds.batch_size=2 \
model.train_ds.num_samples=2 \
model.validation_ds.batch_size=2 \
model.validation_ds.num_samples=2 \
trainer.max_epochs=1 \
+trainer.max_steps=1 \
model.validation_ds.file=/home/TestData/nlp/squad_mini/v2.0/dev-v2.0.json \
model.language_model.pretrained_model_name=bert-base-uncased \
model.dataset.version_2_with_negative=true \
trainer.precision=16 \
trainer.gpus=[1] \
exp_manager=null'
}
}
stage('Duplex Text Normalization with Tarred dataset') {
steps {
sh 'cd examples/nlp/duplex_text_normalization && \
python duplex_text_normalization_train.py \
data.validation_ds.data_path=/home/TestData/nlp/duplex_text_norm/small_test.tsv \
mode=tn \
lang=en \
tagger_model.do_training=false \
decoder_model.transformer=t5-small \
data.validation_ds.batch_size=2 \
data.train_ds.use_cache=false \
data.validation_ds.use_cache=false \
data.test_ds.batch_size=2 \
data.train_ds.decoder_data_augmentation=false \
data.train_ds.num_workers=2 \
decoder_trainer.gpus=[0,1] \
data.train_ds.use_tarred_dataset=true \
+decoder_trainer.fast_dev_run=true \
decoder_exp_manager.create_checkpoint_callback=false \
data.train_ds.tar_metadata_file=/home/TestData/nlp/duplex_text_norm/tarred_small/metadata.json \
data.test_ds.use_cache=false \
data.test_ds.data_path=/home/TestData/nlp/duplex_text_norm/small_test.tsv'
}
}
}
}
// Runs out of memory on the 12G TITAN V (GPU 0 on main CI)
// TODO: add when megatron bert is supported again in NeMo
// stage('L2: MegaBERT Token Classification') {
// when {
// anyOf {
// branch 'main'
// changeRequest target: 'main'
// }
// }
// failFast true
// steps {
// sh 'cd examples/nlp/token_classification && \
// python token_classification_train.py \
// model.dataset.data_dir=/home/TestData/nlp/token_classification_punctuation/ \
// model.language_model.pretrained_model_name=megatron-bert-345m-uncased \
// model.train_ds.batch_size=10 \
// model.dataset.max_seq_length=50 \
// model.dataset.use_cache=false \
// trainer.accelerator=ddp \
// trainer.precision=16 \
// trainer.gpus=[1] \
// +trainer.fast_dev_run=true \
// exp_manager=null'
// }
// }
stage('L2: Parallel SQUAD v1.1 & v2.0') {
when {
anyOf {
branch 'main'
changeRequest target: 'main'
}
}
failFast true
parallel {
// TODO: use megatron bert when supported again
stage('SQUAD v2.0 with DistilBERT Uncased') {
// stage('SQUAD v2.0 with Megatron with ckpt & config') {
// Cannot do fast_dev_run because squad needs whole dev dataset
// model.language_model.pretrained_model_name=megatron-bert-uncased \
// model.language_model.lm_checkpoint=/home/TestData/nlp/megatron_345m_uncased/model_optim_rng.pt \
// model.language_model.config_file=/home/TestData/nlp/megatron_345m_uncased/345m_config.json \
steps {
sh 'cd examples/nlp/question_answering && \
python question_answering_squad.py \
model.train_ds.file=/home/TestData/nlp/squad_mini/v2.0/train-v2.0.json \
model.dataset.use_cache=false \
model.train_ds.batch_size=1 \
model.train_ds.num_samples=1 \
model.validation_ds.batch_size=1 \
model.validation_ds.num_samples=1 \
trainer.accelerator=ddp \
trainer.max_epochs=1 \
+trainer.max_steps=1 \
model.validation_ds.file=/home/TestData/nlp/squad_mini/v2.0/dev-v2.0.json \
model.language_model.pretrained_model_name=distilbert-base-uncased \
model.dataset.version_2_with_negative=true \
trainer.precision=16 \
trainer.gpus=[1] \
exp_manager=null'
}
}
stage('RoBERTa SQUAD 1.1') {
// Cannot do fast_dev_run because squad needs whole dev dataset
steps {
sh 'cd examples/nlp/question_answering && \
python question_answering_squad.py \
model.train_ds.file=/home/TestData/nlp/squad_mini/v1.1/train-v1.1.json \
model.dataset.use_cache=false \
model.train_ds.batch_size=2 \
model.train_ds.num_samples=2 \
model.validation_ds.batch_size=2 \
model.validation_ds.num_samples=2 \
trainer.max_epochs=1 \
+trainer.max_steps=1 \
model.validation_ds.file=/home/TestData/nlp/squad_mini/v1.1/dev-v1.1.json \
model.language_model.pretrained_model_name=roberta-base \
model.dataset.version_2_with_negative=false \
trainer.precision=16 \
trainer.gpus=[0] \
exp_manager=null'
}
}
stage ('Text Classification with BERT Test') {
steps {
sh 'cd examples/nlp/text_classification && \
python text_classification_with_bert.py \
model.dataset.num_classes=6 \
model.train_ds.file_path=/home/TestData/nlp/retail_text_classification/train.tsv \
model.validation_ds.file_path=/home/TestData/nlp/retail_text_classification/dev.tsv \
model.language_model.pretrained_model_name=distilbert-base-uncased \
model.train_ds.batch_size=10 \
model.dataset.max_seq_length=50 \
model.dataset.use_cache=false \
trainer.gpus=[0] \
+trainer.fast_dev_run=true \
exp_manager=null'
}
}
stage('L2: Intent and Slot Classification') {
steps {
sh 'cd examples/nlp/intent_slot_classification && \
python intent_slot_classification.py \
model.data_dir=/home/TestData/nlp/retail \
model.validation_ds.prefix=dev \
model.test_ds.prefix=dev \
trainer.gpus=[0] \
+trainer.fast_dev_run=true \
exp_manager.exp_dir=checkpoints'
sh 'rm -rf checkpoints'
}
}
}
}
// TODO: add when megatron-bert is supported again
// stage('L2: Model Parallel Size 2 Megatron Text Classification') {
// when {
// anyOf{
// branch 'main'
// changeRequest target: 'main'
// }
// }
// failFast true
// steps{
// sh 'cd examples/nlp/text_classification && \
// python text_classification_with_bert.py \
// trainer.gpus=[0,1] \
// trainer.num_nodes=1 \
// trainer.precision=16 \
// trainer.gradient_clip_val=1.0 \
// +trainer.fast_dev_run=true \
// model.dataset.num_classes=6 \
// model.train_ds.file_path=/home/TestData/nlp/retail_text_classification/train.tsv \
// model.train_ds.batch_size=4 \
// model.language_model.pretrained_model_name=megatron-bert-uncased \
// model.language_model.config_file=/home/TestData/nlp/mp_2_bert_toy/config.json \
// model.language_model.lm_checkpoint=/home/TestData/nlp/mp_2_bert_toy/iter_2000000 \
// model.nemo_path=null \
// ~model.infer_samples \
// exp_manager=null'
// }
// }
// stage('L2: Model Parallel Size 2 Megatron Autoresume') {
// when {
// anyOf{
// branch 'main'
// changeRequest target: 'main'
// }
// }
// failFast true
// steps{
// sh 'cd examples/nlp/text_classification && \
// python text_classification_with_bert.py \
// trainer.gpus=[0,1] \
// trainer.num_nodes=1 \
// trainer.precision=16 \
// trainer.gradient_clip_val=1.0 \
// trainer.max_epochs=1 \
// +trainer.fast_dev_run=true \
// model.dataset.num_classes=6 \
// model.train_ds.file_path=/home/TestData/nlp/retail_text_classification/train.tsv \
// model.train_ds.batch_size=4 \
// model.language_model.pretrained_model_name=megatron-bert-uncased \
// model.language_model.config_file=/home/TestData/nlp/mp_2_bert_toy/config.json \
// model.language_model.lm_checkpoint=/home/TestData/nlp/mp_2_bert_toy/iter_2000000 \
// model.nemo_path=null \
// ~model.infer_samples \
// +exp_manager.explicit_log_dir=/home/TestData/nlp/mp_autoresume \
// +exp_manager.resume_if_exists=true'
// }
// }
// stage('L2: Model Parallel Size 2 Megatron Evaluation from .nemo') {
// when {
// anyOf{
// branch 'main'
// changeRequest target: 'main'
// }
// }
// failFast true
// steps{
// sh 'cd examples/nlp/text_classification && \
// python model_parallel_text_classification_evaluation.py \
// trainer.gpus=[0,1] \
// trainer.num_nodes=1 \
// model.dataset.num_classes=6 \
// model.test_ds.file_path=/home/TestData/nlp/retail_text_classification/dev.tsv \
// model.nemo_path=/home/TestData/nlp/mp_2_nemo/retail_text_class_350M.nemo \
// exp_manager=null'
// }
// }
// stage('L2: Model Parallel Size 2 Megatron Train from .nemo') {
// when {
// anyOf{
// branch 'main'
// changeRequest target: 'main'
// }
// }
// failFast true
// steps{
// sh 'cd examples/nlp/token_classification && \
// python token_classification_train.py \
// pretrained_model=/home/TestData/nlp/mp_2_nemo/ner_350M.nemo \
// model.dataset.data_dir=/home/TestData/nlp/ner/ \
// model.train_ds.batch_size=2 \
// model.dataset.use_cache=false \
// trainer.gpus=[0,1] \
// +trainer.fast_dev_run=true \
// model.dataset.class_balancing="weighted_loss" \
// exp_manager=null'
// }
// }
stage('L2: Parallel NLP Examples 2') {
when {
anyOf {
branch 'main'
changeRequest target: 'main'
}
}
failFast true
parallel {
stage ('NER finetuning from pretrained Test') {
steps {
sh 'cd examples/nlp/token_classification && \
python token_classification_train.py \
pretrained_model=ner_en_bert \
model.dataset.data_dir=/home/TestData/nlp/ner/ \
model.train_ds.batch_size=2 \
model.dataset.use_cache=false \
trainer.gpus=[0] \
+trainer.fast_dev_run=true \
model.dataset.class_balancing="weighted_loss" \
exp_manager.exp_dir=null'
}
}
stage ('Punctuation and capitalization finetuning from pretrained test') {
steps {
sh 'cd examples/nlp/token_classification && \
python punctuation_capitalization_train.py \
pretrained_model=punctuation_en_bert \
model.dataset.data_dir=/home/TestData/nlp/token_classification_punctuation/ \
trainer.gpus=[1] \
+trainer.fast_dev_run=true \
model.dataset.use_cache=false \
exp_manager.exp_dir=null'
}
}
stage ('NER with TurkuNLP/bert-base-finnish-cased-v1') {
steps {
sh 'cd examples/nlp/token_classification && \
python token_classification_train.py \
model.dataset.data_dir=/home/TestData/nlp/token_classification_punctuation/ \
trainer.gpus=[0] \
+trainer.fast_dev_run=true \
model.dataset.use_cache=false \
model.language_model.pretrained_model_name="TurkuNLP/bert-base-finnish-cased-v1" \
exp_manager.exp_dir=null'
}
}
stage('Evaluation script for Token Classification') {
steps {
sh 'python examples/nlp/token_classification/token_classification_evaluate.py \
model.dataset.data_dir=/home/TestData/nlp/ner/ \
pretrained_model=/home/TestData/nlp/pretrained_models/NER_Model_with_BERT_base_uncased.nemo && \
rm -rf nemo_experiments'
}
}
stage('Evaluation script for Punctuation') {
steps {
sh 'python examples/nlp/token_classification/punctuation_capitalization_evaluate.py \
model.dataset.data_dir=/home/TestData/nlp/token_classification_punctuation/ \
pretrained_model=/home/TestData/nlp/pretrained_models/Punctuation_Capitalization_with_DistilBERT_base_uncased.nemo && \
rm -rf nemo_experiments'
}
}
stage('L2: Punctuation & Capitalization, 2GPUs with DistilBERT') {
steps {
sh 'cd examples/nlp/token_classification && \
python punctuation_capitalization_train.py \
model.dataset.data_dir=/home/TestData/nlp/token_classification_punctuation/ \
model.language_model.pretrained_model_name=distilbert-base-uncased \
model.dataset.use_cache=false \
trainer.gpus=[0,1] \
trainer.accelerator=ddp \
trainer.max_epochs=1 \
+exp_manager.explicit_log_dir=/home/TestData/nlp/token_classification_punctuation/output && \
python punctuation_capitalization_evaluate.py \
pretrained_model=/home/TestData/nlp/token_classification_punctuation/output/checkpoints/Punctuation_and_Capitalization.nemo && \
rm -rf /home/TestData/nlp/token_classification_punctuation/output/*'
}
}
}
}
stage('L2: Parallel Pretraining BERT pretraining from Text/Preprocessed') {
when {
anyOf {
branch 'main'
changeRequest target: 'main'
}
}
failFast true
parallel {
stage('L2: Pretraining BERT pretraining from Text') {
steps {
sh 'cd examples/nlp/language_modeling && \
python bert_pretraining.py \
--config-name=bert_pretraining_from_text_config.yaml \
trainer.gpus=[0] \
trainer.precision=16 \
+trainer.fast_dev_run=true \
model.train_ds.data_file=/home/TestData/nlp/wikitext-2/train.txt \
model.train_ds.batch_size=32 \
model.validation_ds.data_file=/home/TestData/nlp/wikitext-2/valid.txt \
model.validation_ds.batch_size=32 \
model.language_model.config_file=/home/TestData/nlp/bert_configs/bert_3200.json \
model.optim.lr=0.01 \
model.optim.sched.warmup_ratio=0.1 \
model.tokenizer.tokenizer_name=sentencepiece \
model.tokenizer.tokenizer_model=/home/TestData/nlp/wikitext-2/tokenizer_bpe_v3193/tokenizer.model \
model.mask_prob=0.15 \
model.short_seq_prob=0.1 \
exp_manager.exp_dir=PretrainingBERTFromText \
'
sh 'rm -f /home/TestData/nlp/wikitext-2/*.pkl'
sh 'rm -rf examples/nlp/language_modeling/PretrainingBERTFromText'
sh 'ls -lha examples/nlp/language_modeling'
}
}
stage('L2: Pretraining BERT from Preprocessed') {
steps {
sh 'cd examples/nlp/language_modeling && \
python bert_pretraining.py \
--config-name=bert_pretraining_from_preprocessed_config.yaml \
trainer.gpus=[1] \
trainer.precision=16 \
+trainer.fast_dev_run=true \
model.train_ds.data_file=/home/TestData/nlp/wiki_book_mini/training \
model.train_ds.batch_size=8 \
model.language_model.lm_checkpoint=/home/TestData/nlp/bert_ckpts/nemo1.0/bert_base_uncased_mlm_final_1074591_nemo1.0.pt \
model.language_model.config_file=/home/TestData/nlp/bert_configs/uncased_L-12_H-768_A-12.json \
model.optim.lr=0.875e-4 \
model.optim.weight_decay=0.01 \
model.optim.sched.warmup_ratio=0.01 \
exp_manager.exp_dir=PretrainingBERTFromPreprocessed \
exp_manager.create_checkpoint_callback=False \
'
sh 'rm -rf examples/nlp/language_modeling/PretrainingBERTFromPreprocessed'
sh 'ls -lha examples/nlp/language_modeling'
}
}
}
}
stage('L2: Entity Linking') {
when {
anyOf {
branch 'main'
changeRequest target: 'main'
}
}
failFast true
parallel {
stage ('Self Alignment Pretraining BERT') {
steps {
sh 'cd examples/nlp/entity_linking && \
python self_alignment_pretraining.py \
project_dir=. \
trainer.val_check_interval=3 \
model.raw_data=None \
model.train_ds.data_file=/home/TestData/nlp/entity_linking/tiny_example_train_pairs.tsv \
model.validation_ds.data_file=/home/TestData/nlp/entity_linking/tiny_example_validation_pairs.tsv \
model.train_ds.batch_size=8 \
model.validation_ds.batch_size=8 \
exp_manager.exp_dir=null'
}
}
}
}
stage('L2: NMT Attention is All You Need Training') {
when {
anyOf {
branch 'main'
changeRequest target: 'main'
}
}
failFast true
parallel {
stage('L2: NMT Training Post-LN') {
steps {
sh 'cd examples/nlp/machine_translation && \
python enc_dec_nmt.py \
--config-path=conf \
--config-name=aayn_base \
do_testing=true \
model.train_ds.src_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
model.train_ds.tgt_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.ref \
model.validation_ds.src_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
model.validation_ds.tgt_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
model.test_ds.src_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
model.test_ds.tgt_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
model.encoder_tokenizer.tokenizer_model=/home/TestData/nlp/nmt/toy_data/tt_tokenizer.BPE.4096.model \
model.decoder_tokenizer.tokenizer_model=/home/TestData/nlp/nmt/toy_data/tt_tokenizer.BPE.4096.model \
trainer.gpus=[0] \
+trainer.fast_dev_run=true \
+trainer.limit_test_batches=2 \
exp_manager=null \
'
}
}
stage('L2: NMT Training Pre-LN') {
steps {
sh 'cd examples/nlp/machine_translation && \
python enc_dec_nmt.py \
--config-path=conf \
--config-name=aayn_base \
do_testing=true \
model.train_ds.src_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
model.train_ds.tgt_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.ref \
model.validation_ds.src_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
model.validation_ds.tgt_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
model.test_ds.src_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
model.test_ds.tgt_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
model.encoder_tokenizer.tokenizer_model=/home/TestData/nlp/nmt/toy_data/tt_tokenizer.BPE.4096.model \
model.decoder_tokenizer.tokenizer_model=/home/TestData/nlp/nmt/toy_data/tt_tokenizer.BPE.4096.model \
model.encoder.pre_ln=true \
model.decoder.pre_ln=true \
trainer.gpus=[1] \
+trainer.fast_dev_run=true \
+trainer.limit_test_batches=2 \
exp_manager=null \
'
}
}
stage('L2: NMT Multi-Validation') {
steps {
sh 'cd examples/nlp/machine_translation && \
python enc_dec_nmt.py \
--config-path=conf \
--config-name=aayn_base \
do_testing=true \
model.train_ds.src_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-en-de.src \
model.train_ds.tgt_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-en-de.ref \
model.validation_ds.src_file_name=[/home/TestData/nlp/nmt/toy_data/wmt13-en-de.src,/home/TestData/nlp/nmt/toy_data/wmt14-en-de.src] \
model.validation_ds.tgt_file_name=[/home/TestData/nlp/nmt/toy_data/wmt13-en-de.ref,/home/TestData/nlp/nmt/toy_data/wmt14-en-de.ref] \
model.test_ds.src_file_name=[/home/TestData/nlp/nmt/toy_data/wmt13-en-de.src,/home/TestData/nlp/nmt/toy_data/wmt14-en-de.src] \
model.test_ds.tgt_file_name=[/home/TestData/nlp/nmt/toy_data/wmt13-en-de.ref,/home/TestData/nlp/nmt/toy_data/wmt14-en-de.ref] \
model.encoder_tokenizer.tokenizer_model=/home/TestData/nlp/nmt/toy_data/tt_tokenizer.BPE.4096.model \
model.decoder_tokenizer.tokenizer_model=/home/TestData/nlp/nmt/toy_data/tt_tokenizer.BPE.4096.model \
trainer.gpus=[0] \
+trainer.fast_dev_run=true \
+trainer.limit_test_batches=2 \
exp_manager=null \
'
}
}
}
}
stage('L2: NMT Attention is All You Need Inference') {
when {
anyOf {
branch 'main'
changeRequest target: 'main'
}
}
failFast true
parallel {
stage('L2: NMT Inference - PostLN') {
steps {
sh 'cd examples/nlp/machine_translation && \
python nmt_transformer_infer.py \
--model=/home/TestData/nlp/nmt/toy_data/TransformerLargeDe-En.nemo \
--srctext=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.test.src \
--tgtout=/home/TestData/nlp/nmt/toy_data/out.txt \
--target_lang en \
--source_lang de \
'
}
}
stage('L2: NMT Inference - Pre-LN') {
steps {
sh 'cd examples/nlp/machine_translation && \
python nmt_transformer_infer.py \
--model=/home/TestData/nlp/nmt/toy_data/en_de_24x6_preln.nemo \
--srctext=/home/TestData/nlp/nmt/toy_data/wmt14-en-de.test.src \
--tgtout=/home/TestData/nlp/nmt/toy_data/out.txt \
--target_lang de \
--source_lang en \
'
}
}
}
}
stage('L2: NMT with HuggingFace') {
when {
anyOf {
branch 'main'
changeRequest target: 'main'
}
}
failFast true
parallel {
stage('L2: NMT Pretrained HF Encoder') {
steps {
sh 'cd examples/nlp/machine_translation && \
python enc_dec_nmt.py \
--config-path=conf \
--config-name=huggingface \
model.shared_tokenizer=False \
model.encoder_tokenizer.library=huggingface \
model.encoder.library=huggingface \
model.encoder.model_name=distilbert-base-cased \
model.encoder.pretrained=true \
model.train_ds.src_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
model.train_ds.tgt_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.ref \
model.validation_ds.src_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
model.validation_ds.tgt_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
model.test_ds.src_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
model.test_ds.tgt_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
model.train_ds.tokens_in_batch=128 \
model.validation_ds.tokens_in_batch=128 \
model.test_ds.tokens_in_batch=128 \
model.decoder_tokenizer.tokenizer_model=/home/TestData/nlp/nmt/toy_data/tt_tokenizer.BPE.4096.model \
model.decoder.hidden_size=768 \
model.decoder.inner_size=256 \
trainer.gpus=[0] \
+trainer.fast_dev_run=true \
exp_manager=null \
'
}
}
stage('L2: NMT Custom HF Encoder') {
steps {
sh 'cd examples/nlp/machine_translation && \
python enc_dec_nmt.py \
--config-path=conf \
--config-name=huggingface \
model.shared_tokenizer=True \
model.encoder_tokenizer.library=yttm \
model.encoder_tokenizer.tokenizer_model=/home/TestData/nlp/nmt/toy_data/tt_tokenizer.BPE.4096.model \
model.encoder.library=huggingface \
model.encoder.model_name=null \
model.encoder.pretrained=false \
+model.encoder._target_=transformers.BertConfig \
+model.encoder.hidden_size=48 \
model.train_ds.src_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
model.train_ds.tgt_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.ref \
model.validation_ds.src_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
model.validation_ds.tgt_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
model.test_ds.src_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
model.test_ds.tgt_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
model.train_ds.tokens_in_batch=128 \
model.validation_ds.tokens_in_batch=128 \
model.test_ds.tokens_in_batch=128 \
model.decoder_tokenizer.tokenizer_model=/home/TestData/nlp/nmt/toy_data/tt_tokenizer.BPE.4096.model \
model.decoder.hidden_size=48 \
model.decoder.inner_size=256 \
trainer.gpus=[1] \
+trainer.fast_dev_run=true \
exp_manager=null \
'
}
}
}
}
// TODO: add when megatron bert is supported again in NeMo
// stage('L2: NMT Megatron BERT Model Parallel Size 2 Encoder') {
// when {
// anyOf{
// branch 'main'
// changeRequest target: 'main'
// }
// }
// failFast true
// steps{
// sh 'cd examples/nlp/machine_translation && \
// python enc_dec_nmt.py \
// --config-path=conf \
// --config-name=megatron \
// model.encoder.model_name=megatron-bert-uncased \
// model.encoder.checkpoint_file=/home/TestData/nlp/mp_2_bert_toy/iter_2000000 \
// model.encoder.hidden_size=1024 \
// model.encoder.num_attention_heads=16 \
// model.encoder.num_layers=24 \
// model.encoder.max_position_embeddings=512 \
// model.train_ds.src_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
// model.train_ds.tgt_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.ref \
// model.validation_ds.src_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
// model.validation_ds.tgt_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
// model.test_ds.src_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
// model.test_ds.tgt_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
// model.decoder_tokenizer.tokenizer_model=/home/TestData/nlp/nmt/toy_data/tt_tokenizer.BPE.4096.model \
// model.decoder.hidden_size=1024 \
// trainer.gpus=[0,1] \
// +trainer.fast_dev_run=true \
// exp_manager=null \
// '
// }
// }
stage('L2: NMT Tarred Dataset Creation') {
when {
anyOf {
branch 'main'
changeRequest target: 'main'
}
}
failFast true
parallel {
stage('L2: NMT Auto Tarred Dataset Creation') {
steps {
sh 'cd examples/nlp/machine_translation && \
python enc_dec_nmt.py \
--config-path=conf \
--config-name=aayn_base \
do_training=false \
model.preproc_out_dir=$PWD/preproc_out_dir \
model.train_ds.use_tarred_dataset=true \
model.train_ds.n_preproc_jobs=2 \
model.train_ds.lines_per_dataset_fragment=500 \
model.train_ds.num_batches_per_tarfile=10 \
model.train_ds.src_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
model.train_ds.tgt_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.ref \
model.validation_ds.src_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
model.validation_ds.tgt_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
model.encoder_tokenizer.vocab_size=2000 \
model.decoder_tokenizer.vocab_size=2000 \
~model.test_ds \
trainer.gpus=[0] \
+trainer.fast_dev_run=true \
exp_manager=null \
'
}
}
stage('L2: NMT Script Tarred Dataset Creation') {
steps {
sh 'cd examples/nlp/machine_translation && \
python create_tarred_parallel_dataset.py \
--src_fname /home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
--tgt_fname /home/TestData/nlp/nmt/toy_data/wmt14-de-en.ref \
--out_dir $PWD/out_dir \
--encoder_tokenizer_vocab_size=2000 \
--decoder_tokenizer_vocab_size=2000 \
--tokens_in_batch=1000 \
--lines_per_dataset_fragment=500 \
--num_batches_per_tarfile=10 \
--n_preproc_jobs=2 \
'
}
}
}
}
// stage('L2: NMT Bottleneck Fallback') {
// when {
// anyOf {
// branch 'main'
// changeRequest target: 'main'
// }
// }
// failFast true
// parallel {
// stage('L2: seq2seq (no bottleneck)') {
// steps {
// sh 'cd examples/nlp/machine_translation && \
// enc_dec_nmt-bottleneck.py \
// --config-path=conf \
// --config-name=aayn_bottleneck \
// do_testing=true \
// model.model_type=nll \
// model.encoder.arch=seq2seq \
// model.encoder.hidden_steps=1 \
// model.encoder.hidden_blocks=1 \
// model.encoder.hidden_init_method=params \
// model.encoder.hidden_size=64 \
// model.encoder.inner_size=128 \
// model.encoder.num_attention_heads=2 \
// model.encoder.num_layers=2 \
// model.decoder.hidden_size=64 \
// model.decoder.inner_size=128 \
// model.decoder.num_attention_heads=2 \
// model.decoder.num_layers=2 \
// model.train_ds.src_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-en-de.src \
// model.train_ds.tgt_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-en-de.ref \
// model.validation_ds.src_file_name=[/home/TestData/nlp/nmt/toy_data/wmt13-en-de.src,/home/TestData/nlp/nmt/toy_data/wmt14-en-de.src] \
// model.validation_ds.tgt_file_name=[/home/TestData/nlp/nmt/toy_data/wmt13-en-de.ref,/home/TestData/nlp/nmt/toy_data/wmt14-en-de.ref] \
// model.test_ds.src_file_name=/home/TestData/nlp/nmt/toy_data/wmt13-en-de.src \
// model.test_ds.tgt_file_name=/home/TestData/nlp/nmt/toy_data/wmt13-en-de.ref \
// model.encoder_tokenizer.tokenizer_model=/home/TestData/nlp/nmt/toy_data/tt_tokenizer.BPE.4096.model \
// model.decoder_tokenizer.tokenizer_model=/home/TestData/nlp/nmt/toy_data/tt_tokenizer.BPE.4096.model \
// trainer.gpus=[0] \
// +trainer.fast_dev_run=true \
// +trainer.limit_test_batches=2 \
// exp_manager=null \
// '
// }
// }
// }
// }
// stage('L2: NMT Bottleneck Architecture') {
// when {
// anyOf {
// branch 'main'
// changeRequest target: 'main'
// }
// }
// failFast true
// parallel {
// stage('Bridge Encoder (identity)') {
// steps {
// sh 'cd examples/nlp/machine_translation && \
// enc_dec_nmt-bottleneck.py \
// --config-path=conf \
// --config-name=aayn_bottleneck \
// do_testing=true \
// model.model_type=nll \
// model.encoder.arch=bridge \
// model.encoder.hidden_steps=1 \
// model.encoder.hidden_blocks=1 \
// model.encoder.hidden_init_method=identity \
// model.encoder.hidden_size=64 \
// model.encoder.inner_size=128 \
// model.encoder.num_attention_heads=2 \
// model.encoder.num_layers=2 \
// model.decoder.hidden_size=64 \
// model.decoder.inner_size=128 \
// model.decoder.num_attention_heads=2 \
// model.decoder.num_layers=2 \
// model.train_ds.src_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
// model.train_ds.tgt_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.ref \
// model.validation_ds.src_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
// model.validation_ds.tgt_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
// model.test_ds.src_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
// model.test_ds.tgt_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
// model.encoder_tokenizer.tokenizer_model=/home/TestData/nlp/nmt/toy_data/tt_tokenizer.BPE.4096.model \
// model.decoder_tokenizer.tokenizer_model=/home/TestData/nlp/nmt/toy_data/tt_tokenizer.BPE.4096.model \
// trainer.gpus=[0] \
// +trainer.fast_dev_run=true \
// +trainer.limit_test_batches=2 \
// exp_manager=null \
// '
// }
// }
// stage('Perceiver Encoder (params)') {
// steps {
// sh 'cd examples/nlp/machine_translation && \
// enc_dec_nmt-bottleneck.py \
// --config-path=conf \
// --config-name=aayn_bottleneck \
// do_testing=true \
// model.model_type=nll \
// model.encoder.arch=perceiver \
// model.encoder.hidden_steps=1 \
// model.encoder.hidden_blocks=1 \
// model.encoder.hidden_init_method=params \
// model.encoder.hidden_size=64 \
// model.encoder.inner_size=128 \
// model.encoder.num_attention_heads=2 \
// model.encoder.num_layers=2 \
// model.decoder.hidden_size=64 \
// model.decoder.inner_size=128 \
// model.decoder.num_attention_heads=2 \
// model.decoder.num_layers=2 \
// model.train_ds.src_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
// model.train_ds.tgt_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.ref \
// model.validation_ds.src_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
// model.validation_ds.tgt_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
// model.test_ds.src_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
// model.test_ds.tgt_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
// model.encoder_tokenizer.tokenizer_model=/home/TestData/nlp/nmt/toy_data/tt_tokenizer.BPE.4096.model \
// model.decoder_tokenizer.tokenizer_model=/home/TestData/nlp/nmt/toy_data/tt_tokenizer.BPE.4096.model \
// trainer.gpus=[1] \
// +trainer.fast_dev_run=true \
// +trainer.limit_test_batches=2 \
// exp_manager=null \
// '
// }
// }
// }
// }
// stage('L2: NMT Bottleneck LVM') {
// when {
// anyOf {
// branch 'main'
// changeRequest target: 'main'
// }
// }
// failFast true
// parallel {
// stage('VAE') {
// steps {
// sh 'cd examples/nlp/machine_translation && \
// enc_dec_nmt-bottleneck.py \
// --config-path=conf \
// --config-name=aayn_bottleneck \
// do_testing=true \
// model.model_type=vae \
// model.encoder.arch=perceiver \
// model.encoder.hidden_steps=1 \
// model.encoder.hidden_blocks=1 \
// model.encoder.hidden_init_method=params \
// model.encoder.hidden_size=64 \
// model.encoder.inner_size=128 \
// model.encoder.num_attention_heads=2 \
// model.encoder.num_layers=2 \
// model.decoder.hidden_size=64 \
// model.decoder.inner_size=128 \
// model.decoder.num_attention_heads=2 \
// model.decoder.num_layers=2 \
// model.train_ds.src_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
// model.train_ds.tgt_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.ref \
// model.validation_ds.src_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
// model.validation_ds.tgt_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
// model.test_ds.src_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
// model.test_ds.tgt_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
// model.encoder_tokenizer.tokenizer_model=/home/TestData/nlp/nmt/toy_data/tt_tokenizer.BPE.4096.model \
// model.decoder_tokenizer.tokenizer_model=/home/TestData/nlp/nmt/toy_data/tt_tokenizer.BPE.4096.model \
// trainer.gpus=[0] \
// +trainer.fast_dev_run=true \
// +trainer.limit_test_batches=2 \
// exp_manager=null \
// '
// }
// }
// stage('MIM') {
// steps {
// sh 'cd examples/nlp/machine_translation && \
// enc_dec_nmt-bottleneck.py \
// --config-path=conf \
// --config-name=aayn_bottleneck \
// do_testing=true \
// model.model_type=mim \
// model.encoder.arch=perceiver \
// model.encoder.hidden_steps=1 \
// model.encoder.hidden_blocks=1 \
// model.encoder.hidden_init_method=params \
// model.encoder.hidden_size=64 \
// model.encoder.inner_size=128 \
// model.encoder.num_attention_heads=2 \
// model.encoder.num_layers=2 \
// model.decoder.hidden_size=64 \
// model.decoder.inner_size=128 \
// model.decoder.num_attention_heads=2 \
// model.decoder.num_layers=2 \
// model.train_ds.src_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
// model.train_ds.tgt_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.ref \
// model.validation_ds.src_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
// model.validation_ds.tgt_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
// model.test_ds.src_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
// model.test_ds.tgt_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
// model.encoder_tokenizer.tokenizer_model=/home/TestData/nlp/nmt/toy_data/tt_tokenizer.BPE.4096.model \
// model.decoder_tokenizer.tokenizer_model=/home/TestData/nlp/nmt/toy_data/tt_tokenizer.BPE.4096.model \
// trainer.gpus=[1] \
// +trainer.fast_dev_run=true \
// +trainer.limit_test_batches=2 \
// exp_manager=null \
// '
// }
// }
// }
// }
stage('L2: TTS Fast dev runs 1') {
when {
anyOf {
branch 'main'
changeRequest target: 'main'
}
}
parallel {
stage('Tacotron 2') {
steps {
sh 'python examples/tts/tacotron2.py \
train_dataset=/home/TestData/an4_dataset/an4_train.json \
validation_datasets=/home/TestData/an4_dataset/an4_val.json \
trainer.gpus="[0]" \
+trainer.limit_train_batches=1 +trainer.limit_val_batches=1 trainer.max_epochs=1 \
trainer.accelerator=null \
model.train_ds.dataloader_params.batch_size=4 \
model.validation_ds.dataloader_params.batch_size=4 \
model.decoder.decoder_rnn_dim=256 \
model.decoder.attention_rnn_dim=1024 \
model.decoder.prenet_dim=128 \
model.postnet.postnet_n_convolutions=3 \
~trainer.check_val_every_n_epoch'
}
}
stage('WaveGlow') {
steps {
sh 'python examples/tts/waveglow.py \
train_dataset=/home/TestData/an4_dataset/an4_train.json \
validation_datasets=/home/TestData/an4_dataset/an4_val.json \
trainer.gpus="[0]" \
+trainer.limit_train_batches=1 +trainer.limit_val_batches=1 trainer.max_epochs=1 \
trainer.accelerator=null \
model.train_ds.dataloader_params.batch_size=4 \
model.validation_ds.dataloader_params.batch_size=4 \
model.waveglow.n_flows=4 \
model.waveglow.n_wn_layers=2 \
model.waveglow.n_wn_channels=32 \
~trainer.check_val_every_n_epoch'
}
}
stage('FastPitch') {
steps {
sh 'python examples/tts/fastpitch.py \
--config-name fastpitch_align \
train_dataset=/home/TestData/an4_dataset/an4_train.json \
validation_datasets=/home/TestData/an4_dataset/an4_val.json \
prior_folder=/home/TestData/an4_dataset/beta_priors \
trainer.gpus="[0]" \
+trainer.limit_train_batches=1 +trainer.limit_val_batches=1 trainer.max_epochs=1 \
trainer.accelerator=null \
model.train_ds.dataloader_params.batch_size=4 \
model.train_ds.dataloader_params.num_workers=1 \
model.validation_ds.dataloader_params.batch_size=4 \
model.validation_ds.dataloader_params.num_workers=1 \
model.symbols_embedding_dim=64 \
model.input_fft.d_inner=384 \
model.input_fft.n_layer=2 \
model.output_fft.d_inner=384 \
model.output_fft.n_layer=2 \
~trainer.check_val_every_n_epoch'
}
}
stage('Hifigan') {
steps {
sh 'python examples/tts/hifigan.py \
train_dataset=/home/TestData/an4_dataset/an4_train.json \
validation_datasets=/home/TestData/an4_dataset/an4_val.json \
trainer.gpus="[0]" \
+trainer.limit_train_batches=1 +trainer.limit_val_batches=1 +trainer.max_epochs=1 \
trainer.accelerator=null \
model.train_ds.dataloader_params.batch_size=4 \
model.train_ds.dataloader_params.num_workers=1 \
model.validation_ds.dataloader_params.batch_size=4 \
model.validation_ds.dataloader_params.num_workers=1 \
model.generator.upsample_initial_channel=64 \
+model.debug=true \
~trainer.check_val_every_n_epoch'
}
}
}
}
stage('L??: Speech Checkpoints tests') {
when {
anyOf {
branch 'main'
changeRequest target: 'main'
}
}
failFast true
steps {
sh 'CUDA_VISIBLE_DEVICES=0 python examples/asr/speech_to_text_infer.py --asr_model QuartzNet15x5Base-En --dataset /home/TestData/librispeech/librivox-dev-other.json --wer_tolerance 0.1012 --batch_size 64'
}
}
}
post {
always {
sh 'chmod -R 777 .'
cleanWs()
}
}
}