|
from transformers import PretrainedConfig |
|
from typing import List |
|
|
|
|
|
class DinoHuVitsConfig(PretrainedConfig): |
|
model_type = "DinoHuVits" |
|
|
|
def __init__( |
|
self, |
|
inter_channels=192, |
|
hidden_channels=192, |
|
resblock="1", |
|
resblock_kernel_sizes=[3, 7, 11], |
|
resblock_dilation_sizes=[[1, 3, 5], [1, 3, 5], [1, 3, 5]], |
|
upsample_rates=[10, 8, 2, 2], |
|
upsample_initial_channel=512, |
|
upsample_kernel_sizes=[20, 16, 4, 4], |
|
gin_channels=256, |
|
hubert_feature_channels=768, |
|
hubert_downsample_channels=192, |
|
hubert_output_layer=11, |
|
**kwargs |
|
): |
|
self.inter_channels = inter_channels |
|
self.hidden_channels = hidden_channels |
|
self.resblock = resblock |
|
self.resblock_kernel_sizes = resblock_kernel_sizes |
|
self.resblock_dilation_sizes = resblock_dilation_sizes |
|
self.upsample_rates = upsample_rates |
|
self.upsample_initial_channel = upsample_initial_channel |
|
self.upsample_kernel_sizes = upsample_kernel_sizes |
|
self.gin_channels = gin_channels |
|
|
|
self.hubert_feature_channels = hubert_feature_channels |
|
self.hubert_downsample_channels = hubert_downsample_channels |
|
self.hubert_output_layer = hubert_output_layer |
|
super().__init__(**kwargs) |
|
|