| from transformers import PretrainedConfig | |
| from typing import List | |
| class DinoHuVitsConfig(PretrainedConfig): | |
| model_type = "DinoHuVits" | |
| def __init__( | |
| self, | |
| inter_channels=192, | |
| hidden_channels=192, | |
| resblock="1", | |
| resblock_kernel_sizes=[3, 7, 11], | |
| resblock_dilation_sizes=[[1, 3, 5], [1, 3, 5], [1, 3, 5]], | |
| upsample_rates=[10, 8, 2, 2], | |
| upsample_initial_channel=512, | |
| upsample_kernel_sizes=[20, 16, 4, 4], | |
| gin_channels=256, | |
| hubert_feature_channels=768, | |
| hubert_downsample_channels=192, | |
| hubert_output_layer=11, | |
| **kwargs | |
| ): | |
| self.inter_channels = inter_channels | |
| self.hidden_channels = hidden_channels | |
| self.resblock = resblock | |
| self.resblock_kernel_sizes = resblock_kernel_sizes | |
| self.resblock_dilation_sizes = resblock_dilation_sizes | |
| self.upsample_rates = upsample_rates | |
| self.upsample_initial_channel = upsample_initial_channel | |
| self.upsample_kernel_sizes = upsample_kernel_sizes | |
| self.gin_channels = gin_channels | |
| self.hubert_feature_channels = hubert_feature_channels | |
| self.hubert_downsample_channels = hubert_downsample_channels | |
| self.hubert_output_layer = hubert_output_layer | |
| super().__init__(**kwargs) | |