126 lines
4.1 KiB
Python
126 lines
4.1 KiB
Python
# BSD 3-Clause License
|
|
|
|
# Copyright (c) 2018-2020, NVIDIA Corporation
|
|
# All rights reserved.
|
|
|
|
# Redistribution and use in source and binary forms, with or without
|
|
# modification, are permitted provided that the following conditions are met:
|
|
|
|
# * Redistributions of source code must retain the above copyright notice, this
|
|
# list of conditions and the following disclaimer.
|
|
|
|
# * Redistributions in binary form must reproduce the above copyright notice,
|
|
# this list of conditions and the following disclaimer in the documentation
|
|
# and/or other materials provided with the distribution.
|
|
|
|
# * Neither the name of the copyright holder nor the names of its
|
|
# contributors may be used to endorse or promote products derived from
|
|
# this software without specific prior written permission.
|
|
|
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
|
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
"""https://github.com/NVIDIA/tacotron2"""
|
|
|
|
import torch
|
|
import numpy as np
|
|
from scipy.signal import get_window
|
|
import librosa.util as librosa_util
|
|
|
|
|
|
def window_sumsquare(window, n_frames, hop_length=200, win_length=800,
|
|
n_fft=800, dtype=np.float32, norm=None):
|
|
"""
|
|
# from librosa 0.6
|
|
Compute the sum-square envelope of a window function at a given hop length.
|
|
|
|
This is used to estimate modulation effects induced by windowing
|
|
observations in short-time fourier transforms.
|
|
|
|
Parameters
|
|
----------
|
|
window : string, tuple, number, callable, or list-like
|
|
Window specification, as in `get_window`
|
|
|
|
n_frames : int > 0
|
|
The number of analysis frames
|
|
|
|
hop_length : int > 0
|
|
The number of samples to advance between frames
|
|
|
|
win_length : [optional]
|
|
The length of the window function. By default, this matches `n_fft`.
|
|
|
|
n_fft : int > 0
|
|
The length of each analysis frame.
|
|
|
|
dtype : np.dtype
|
|
The data type of the output
|
|
|
|
Returns
|
|
-------
|
|
wss : np.ndarray, shape=`(n_fft + hop_length * (n_frames - 1))`
|
|
The sum-squared envelope of the window function
|
|
"""
|
|
if win_length is None:
|
|
win_length = n_fft
|
|
|
|
n = n_fft + hop_length * (n_frames - 1)
|
|
x = np.zeros(n, dtype=dtype)
|
|
|
|
# Compute the squared window at the desired length
|
|
win_sq = get_window(window, win_length, fftbins=True)
|
|
win_sq = librosa_util.normalize(win_sq, norm=norm)**2
|
|
win_sq = librosa_util.pad_center(win_sq, n_fft)
|
|
|
|
# Fill the envelope
|
|
for i in range(n_frames):
|
|
sample = i * hop_length
|
|
x[sample:min(n, sample + n_fft)
|
|
] += win_sq[:max(0, min(n_fft, n - sample))]
|
|
return x
|
|
|
|
|
|
def griffin_lim(magnitudes, stft_fn, n_iters=30):
|
|
"""
|
|
PARAMS
|
|
------
|
|
magnitudes: spectrogram magnitudes
|
|
stft_fn: STFT class with transform (STFT) and inverse (ISTFT) methods
|
|
"""
|
|
|
|
angles = np.angle(np.exp(2j * np.pi * np.random.rand(*magnitudes.size())))
|
|
angles = angles.astype(np.float32)
|
|
angles = torch.autograd.Variable(torch.from_numpy(angles))
|
|
signal = stft_fn.inverse(magnitudes, angles).squeeze(1)
|
|
|
|
for i in range(n_iters):
|
|
_, angles = stft_fn.transform(signal)
|
|
signal = stft_fn.inverse(magnitudes, angles).squeeze(1)
|
|
return signal
|
|
|
|
|
|
def dynamic_range_compression(x, C=1, clip_val=1e-5):
|
|
"""
|
|
PARAMS
|
|
------
|
|
C: compression factor
|
|
"""
|
|
return torch.log(torch.clamp(x, min=clip_val) * C)
|
|
|
|
|
|
def dynamic_range_decompression(x, C=1):
|
|
"""
|
|
PARAMS
|
|
------
|
|
C: compression factor used to compress
|
|
"""
|
|
return torch.exp(x) / C
|