| | import numpy as np |
| | import torch |
| | import random |
| |
|
| |
|
| | def frame_shift(mels, labels, net_pooling=4): |
| | bsz, n_bands, frames = mels.shape |
| | shifted = [] |
| | new_labels = [] |
| | for bindx in range(bsz): |
| | shift = int(random.gauss(0, 90)) |
| | shifted.append(torch.roll(mels[bindx], shift, dims=-1)) |
| | shift = -abs(shift) // net_pooling if shift < 0 else shift // net_pooling |
| | new_labels.append(torch.roll(labels[bindx], shift, dims=-1)) |
| | return torch.stack(shifted), torch.stack(new_labels) |
| |
|
| |
|
| | def mixup(data, target=None, alpha=0.2, beta=0.2, mixup_label_type="soft"): |
| | """Mixup data augmentation by permuting the data |
| | |
| | Args: |
| | data: input tensor, must be a batch so data can be permuted and mixed. |
| | target: tensor of the target to be mixed, if None, do not return targets. |
| | alpha: float, the parameter to the np.random.beta distribution |
| | beta: float, the parameter to the np.random.beta distribution |
| | mixup_label_type: str, the type of mixup to be used choice between {'soft', 'hard'}. |
| | Returns: |
| | torch.Tensor of mixed data and labels if given |
| | """ |
| | with torch.no_grad(): |
| | batch_size = data.size(0) |
| | c = np.random.beta(alpha, beta) |
| |
|
| | perm = torch.randperm(batch_size) |
| |
|
| | mixed_data = c * data + (1 - c) * data[perm, :] |
| | if target is not None: |
| | if mixup_label_type == "soft": |
| | mixed_target = torch.clamp( |
| | c * target + (1 - c) * target[perm, :], min=0, max=1 |
| | ) |
| | elif mixup_label_type == "hard": |
| | mixed_target = torch.clamp(target + target[perm, :], min=0, max=1) |
| | else: |
| | raise NotImplementedError( |
| | f"mixup_label_type: {mixup_label_type} not implemented. choice in " |
| | f"{'soft', 'hard'}" |
| | ) |
| |
|
| | return mixed_data, mixed_target |
| | else: |
| | return mixed_data |
| |
|
| |
|
| | def add_noise(mels, snrs=(6, 30), dims=(1, 2)): |
| | """ Add white noise to mels spectrograms |
| | Args: |
| | mels: torch.tensor, mels spectrograms to apply the white noise to. |
| | snrs: int or tuple, the range of snrs to choose from if tuple (uniform) |
| | dims: tuple, the dimensions for which to compute the standard deviation (default to (1,2) because assume |
| | an input of a batch of mel spectrograms. |
| | Returns: |
| | torch.Tensor of mels with noise applied |
| | """ |
| | if isinstance(snrs, (list, tuple)): |
| | snr = (snrs[0] - snrs[1]) * torch.rand( |
| | (mels.shape[0],), device=mels.device |
| | ).reshape(-1, 1, 1) + snrs[1] |
| | else: |
| | snr = snrs |
| |
|
| | snr = 10 ** (snr / 20) |
| | sigma = torch.std(mels, dim=dims, keepdim=True) / snr |
| | mels = mels + torch.randn(mels.shape, device=mels.device) * sigma |
| |
|
| | return mels |
| |
|