model_name: TSED_AS_filter

encoder:
  target_length: 1008
  patch_size: [64, 4]
  patch_stride: [64, 4]
  group_masking: True
  embed_dim: 768
  depth: 12
  num_heads: 12

decoder:
  embed_dim: 768
  depth: 6
  num_heads: 12
  cls_dim: 512
  fusion: adaln

ft_blocks: [6, 7, 8, 9, 10, 11]
frozen_encoder: false
 
net_pooling: 4
sr: 16000

data_aug:
  time_mask_ratios: [5, 20]
  transform:                           # hyperparameters for data augmentations that do not alter the label information.
    n_transform: 2                     # 0: no augmentation below is applied. 1: same augmentation below is applied on student/teacher model input. 2: different augmentations below is applied on student/teacher model input.
    choice: [ 1, 0, 0 ]                # apply the chosen data augmentations: [ FilterAugment, freq_mask, add_noise ]
    filter_db_range: [ -4.5, 6 ]       # db range of FilterAugment to be applied on each band
    filter_bands: [ 2, 5 ]             # range of frequency band number in FilterAugment
    filter_minimum_bandwidth: 4
    filter_type: step
    freq_mask_ratio: 16                # maximum ratio of freuqnecy masking range. max 1/16 of total frequnecy number will be masked
    noise_snrs: [ 35, 40 ]             # snr of original signal wrpt the noise added.

opt: !include opt_config.yml
data: !include data_config.yml