Spaces:

jonathanjordan21
/

tts-rvc-autopst

Runtime error

App Files Files Community

tts-rvc-autopst / utils.py

jonathanjordan21

Upload folder using huggingface_hub (#1)

7ce5feb verified 4 months ago

raw

history blame contribute delete

3.3 kB

	import copy
	import torch
	import numpy as np
	from scipy import signal
	from librosa.filters import mel
	from scipy.signal import get_window
	import torch
	import torch.nn as nn
	import torch.nn.functional as F


	def butter_highpass(cutoff, fs, order=5):
	nyq = 0.5 * fs
	normal_cutoff = cutoff / nyq
	b, a = signal.butter(order, normal_cutoff, btype='high', analog=False)
	return b, a



	def pySTFT(x, fft_length=1024, hop_length=256):

	x = np.pad(x, int(fft_length//2), mode='reflect')

	noverlap = fft_length - hop_length
	shape = x.shape[:-1]+((x.shape[-1]-noverlap)//hop_length, fft_length)
	strides = x.strides[:-1]+(hop_length*x.strides[-1], x.strides[-1])
	result = np.lib.stride_tricks.as_strided(x, shape=shape,
	strides=strides)

	fft_window = get_window('hann', fft_length, fftbins=True)
	result = np.fft.rfft(fft_window * result, n=fft_length).T

	return np.abs(result)



	class LinearNorm(torch.nn.Module):
	def __init__(self, in_dim, out_dim, bias=True, w_init_gain='linear'):
	super(LinearNorm, self).__init__()
	self.linear_layer = torch.nn.Linear(in_dim, out_dim, bias=bias)

	torch.nn.init.xavier_uniform_(
	self.linear_layer.weight,
	gain=torch.nn.init.calculate_gain(w_init_gain))

	def forward(self, x):
	return self.linear_layer(x)


	class ConvNorm(torch.nn.Module):
	def __init__(self, in_channels, out_channels, kernel_size=1, stride=1,
	padding=None, dilation=1, bias=True, w_init_gain='linear'):
	super(ConvNorm, self).__init__()
	if padding is None:
	assert(kernel_size % 2 == 1)
	padding = int(dilation * (kernel_size - 1) / 2)

	self.conv = torch.nn.Conv1d(in_channels, out_channels,
	kernel_size=kernel_size, stride=stride,
	padding=padding, dilation=dilation,
	bias=bias)

	torch.nn.init.xavier_uniform_(
	self.conv.weight, gain=torch.nn.init.calculate_gain(w_init_gain))

	def forward(self, signal):
	conv_signal = self.conv(signal)
	return conv_signal



	def filter_bank_mean(num_rep, codes_mask, max_len_long):
	'''
	num_rep (B, L)
	codes_mask (B, L)

	output: filterbank (B, L, max_len_fake)

	zero pad in codes must be real zero
	'''

	num_rep = num_rep.unsqueeze(-1) # (B, L, 1)
	codes_mask = codes_mask.unsqueeze(-1) # (B, L, 1)
	num_rep = num_rep * codes_mask

	right_edge = num_rep.cumsum(dim=1)
	left_edge = torch.zeros_like(right_edge)
	left_edge[:, 1:, :] = right_edge[:, :-1, :]
	right_edge = right_edge.ceil()
	left_edge = left_edge.floor()

	index = torch.arange(1, max_len_long+1, device=num_rep.device).view(1, 1, -1)

	lower = index - left_edge

	right_edge_flip = max_len_long - right_edge

	upper = (index - right_edge_flip).flip(dims=(2,))

	# triangular pooling
	fb = F.relu(torch.min(lower, upper)).float()

	# mean pooling
	fb = (fb > 0).float()

	norm = fb.sum(dim=-1, keepdim=True)
	norm[norm==0] = 1.0

	fb = fb / norm

	return fb * codes_mask