Text-to-Music / tests /models /test_multibanddiffusion.py
reach-vb's picture
reach-vb HF staff
Stereo demo update (#60)
5325fcc
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
import random
import numpy as np
import torch
from audiocraft.models.multibanddiffusion import MultiBandDiffusion, DiffusionProcess
from audiocraft.models import EncodecModel, DiffusionUnet
from audiocraft.modules import SEANetEncoder, SEANetDecoder
from audiocraft.modules.diffusion_schedule import NoiseSchedule
from audiocraft.quantization import DummyQuantizer
class TestMBD:
def _create_mbd(self,
sample_rate: int,
channels: int,
n_filters: int = 3,
n_residual_layers: int = 1,
ratios: list = [5, 4, 3, 2],
num_steps: int = 1000,
codec_dim: int = 128,
**kwargs):
frame_rate = np.prod(ratios)
encoder = SEANetEncoder(channels=channels, dimension=codec_dim, n_filters=n_filters,
n_residual_layers=n_residual_layers, ratios=ratios)
decoder = SEANetDecoder(channels=channels, dimension=codec_dim, n_filters=n_filters,
n_residual_layers=n_residual_layers, ratios=ratios)
quantizer = DummyQuantizer()
compression_model = EncodecModel(encoder, decoder, quantizer, frame_rate=frame_rate,
sample_rate=sample_rate, channels=channels, **kwargs)
diffusion_model = DiffusionUnet(chin=channels, num_steps=num_steps, codec_dim=codec_dim)
schedule = NoiseSchedule(device='cpu', num_steps=num_steps)
DP = DiffusionProcess(model=diffusion_model, noise_schedule=schedule)
mbd = MultiBandDiffusion(DPs=[DP], codec_model=compression_model)
return mbd
def test_model(self):
random.seed(1234)
sample_rate = 24_000
channels = 1
codec_dim = 128
mbd = self._create_mbd(sample_rate=sample_rate, channels=channels, codec_dim=codec_dim)
for _ in range(10):
length = random.randrange(1, 10_000)
x = torch.randn(2, channels, length)
res = mbd.regenerate(x, sample_rate)
assert res.shape == x.shape