DeepLearningExamples/PyTorch/SpeechSynthesis/Tacotron2/preprocess_audio2mel.py
Przemek Strzelczyk 0663b67c1a Updating models
2019-07-08 22:51:28 +02:00

65 lines
2.6 KiB
Python

import argparse
import torch
from tacotron2.data_function import TextMelLoader
from common.utils import load_filepaths_and_text
def parse_args(parser):
"""
Parse commandline arguments.
"""
parser.add_argument('-d', '--dataset-path', type=str,
default='./', help='Path to dataset')
parser.add_argument('--wav-files', required=True,
type=str, help='Path to filelist with audio paths and text')
parser.add_argument('--mel-files', required=True,
type=str, help='Path to filelist with mel paths and text')
parser.add_argument('--text-cleaners', nargs='*',
default=['english_cleaners'], type=str,
help='Type of text cleaners for input text')
parser.add_argument('--max-wav-value', default=32768.0, type=float,
help='Maximum audiowave value')
parser.add_argument('--sampling-rate', default=22050, type=int,
help='Sampling rate')
parser.add_argument('--filter-length', default=1024, type=int,
help='Filter length')
parser.add_argument('--hop-length', default=256, type=int,
help='Hop (stride) length')
parser.add_argument('--win-length', default=1024, type=int,
help='Window length')
parser.add_argument('--mel-fmin', default=0.0, type=float,
help='Minimum mel frequency')
parser.add_argument('--mel-fmax', default=8000.0, type=float,
help='Maximum mel frequency')
parser.add_argument('--n-mel-channels', default=80, type=int,
help='Number of bins in mel-spectrograms')
return parser
def audio2mel(dataset_path, audiopaths_and_text, melpaths_and_text, args):
melpaths_and_text_list = load_filepaths_and_text(dataset_path, melpaths_and_text)
audiopaths_and_text_list = load_filepaths_and_text(dataset_path, audiopaths_and_text)
data_loader = TextMelLoader(dataset_path, audiopaths_and_text, args)
for i in range(len(melpaths_and_text_list)):
if i%100 == 0:
print("done", i, "/", len(melpaths_and_text_list))
mel = data_loader.get_mel(audiopaths_and_text_list[i][0])
torch.save(mel, melpaths_and_text_list[i][0])
def main():
parser = argparse.ArgumentParser(description='PyTorch Tacotron 2 Training')
parser = parse_args(parser)
args = parser.parse_args()
args.load_mel_from_disk = False
audio2mel(args.dataset_path, args.wav_files, args.mel_files, args)
if __name__ == '__main__':
main()