model_name: TSED_AS_filter encoder: target_length: 1008 patch_size: [64, 4] patch_stride: [64, 4] group_masking: True embed_dim: 768 depth: 12 num_heads: 12 decoder: embed_dim: 768 depth: 6 num_heads: 12 cls_dim: 512 fusion: adaln ft_blocks: [6, 7, 8, 9, 10, 11] frozen_encoder: false net_pooling: 4 sr: 16000 data_aug: time_mask_ratios: [5, 20] transform: # hyperparameters for data augmentations that do not alter the label information. n_transform: 2 # 0: no augmentation below is applied. 1: same augmentation below is applied on student/teacher model input. 2: different augmentations below is applied on student/teacher model input. choice: [ 1, 0, 0 ] # apply the chosen data augmentations: [ FilterAugment, freq_mask, add_noise ] filter_db_range: [ -4.5, 6 ] # db range of FilterAugment to be applied on each band filter_bands: [ 2, 5 ] # range of frequency band number in FilterAugment filter_minimum_bandwidth: 4 filter_type: step freq_mask_ratio: 16 # maximum ratio of freuqnecy masking range. max 1/16 of total frequnecy number will be masked noise_snrs: [ 35, 40 ] # snr of original signal wrpt the noise added. opt: !include opt_config.yml data: !include data_config.yml