MichaelFinkelson commited on
Commit
330bd77
·
verified ·
1 Parent(s): 7af389a

Upload avclip.yaml with huggingface_hub

Browse files
Files changed (1) hide show
  1. avclip.yaml +282 -0
avclip.yaml ADDED
@@ -0,0 +1,282 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ action: train_avsync_model
2
+ model:
3
+ target: model.sync_model.Synchformer
4
+ params:
5
+ afeat_extractor:
6
+ is_trainable: false
7
+ target: model.modules.feat_extractors.audio.ast.AST
8
+ params:
9
+ ckpt_path: /scratch/project_462000293/vladimir/logs/sync/avclip_models/23-12-22T16-13-38/checkpoints/epoch_e28.pt
10
+ extract_features: true
11
+ max_spec_t: 66
12
+ factorize_freq_time: true
13
+ agg_freq_module: TransformerEncoderLayer
14
+ agg_time_module: torch.nn.Identity
15
+ add_global_repr: false
16
+ vfeat_extractor:
17
+ is_trainable: false
18
+ target: model.modules.feat_extractors.visual.motionformer.MotionFormer
19
+ params:
20
+ ckpt_path: /scratch/project_462000293/vladimir/logs/sync/avclip_models/23-12-22T16-13-38/checkpoints/epoch_e28.pt
21
+ extract_features: true
22
+ factorize_space_time: true
23
+ agg_space_module: TransformerEncoderLayer
24
+ agg_time_module: torch.nn.Identity
25
+ add_global_repr: false
26
+ aproj:
27
+ target: torch.nn.Linear
28
+ params:
29
+ in_features: 768
30
+ out_features: 768
31
+ vproj:
32
+ target: torch.nn.Linear
33
+ params:
34
+ in_features: 768
35
+ out_features: 768
36
+ transformer:
37
+ target: model.sync_model.GlobalTransformer
38
+ params:
39
+ n_layer: 3
40
+ n_head: 8
41
+ n_embd: 768
42
+ tok_pdrop: 0.0
43
+ embd_pdrop: 0.1
44
+ resid_pdrop: 0.1
45
+ attn_pdrop: 0.1
46
+ pos_emb_cfg:
47
+ target: model.modules.transformer.RandInitPositionalEncoding
48
+ params:
49
+ block_shape:
50
+ - 198
51
+ n_embd: 768
52
+ off_head_cfg:
53
+ target: torch.nn.Linear
54
+ params:
55
+ in_features: 768
56
+ out_features: 21
57
+ training:
58
+ base_learning_rate: 2.0e-06
59
+ base_batch_size: 16
60
+ num_workers: 7
61
+ num_epochs: 10000
62
+ patience: 50
63
+ to_max_metric: true
64
+ metric_name: accuracy_1
65
+ early_stop_phase: valid
66
+ use_half_precision: true
67
+ seed: 1337
68
+ compile: false
69
+ skip_test: false
70
+ run_test_only: false
71
+ resume: false
72
+ finetune: false
73
+ dist_backend: nccl
74
+ max_clip_norm: 1
75
+ lr_scheduler:
76
+ name: constant_with_warmup
77
+ warmup: 1000
78
+ optimizer:
79
+ name: adam
80
+ betas:
81
+ - 0.9
82
+ - 0.999
83
+ momentum: 0.9
84
+ weight_decay: 0
85
+ local_rank: 0
86
+ global_rank: 0
87
+ world_size: 32
88
+ data:
89
+ offset_type: grid
90
+ num_off_cls: 21
91
+ prob_oos: null
92
+ max_off_sec: 2
93
+ crop_len_sec: 10
94
+ step_size_seg: 0.5625
95
+ vids_path: /scratch/project_462000293/vladimir/data/audioset/h264_video_25fps_256side_16000hz_aac/
96
+ size_before_crop: 256
97
+ input_size: 224
98
+ segment_size_vframes: 16
99
+ vfps: 25
100
+ afps: 16000
101
+ n_segments: 14
102
+ do_offset: true
103
+ p_color_jitter: 0.0
104
+ p_gray_scale: 0.0
105
+ sometimes_upscale_p: 0.0
106
+ is_spatial_crop_random: true
107
+ is_temporal_crop_random: true
108
+ audio_jitter_sec: 0.05
109
+ p_horizontal_flip: 0.5
110
+ p_audio_aug: 0.0
111
+ dataset:
112
+ target: dataset.audioset.AudioSet
113
+ params:
114
+ load_fixed_offsets_on:
115
+ - valid
116
+ - test
117
+ vis_load_backend: read_video
118
+ size_ratio: null
119
+ transform_sequence_train:
120
+ - target: dataset.transforms.EqualifyFromRight
121
+ params:
122
+ clip_max_len_sec: 10
123
+ - target: dataset.transforms.RGBSpatialCropSometimesUpscale
124
+ params:
125
+ sometimes_p: 0.0
126
+ smaller_input_size: 192
127
+ target_input_size: 224
128
+ is_random: true
129
+ - target: dataset.transforms.TemporalCropAndOffset
130
+ params:
131
+ crop_len_sec: 10
132
+ max_off_sec: 2
133
+ max_wiggle_sec: 0.05
134
+ do_offset: true
135
+ offset_type: grid
136
+ prob_oos: null
137
+ grid_size: 21
138
+ segment_size_vframes: 16
139
+ n_segments: null
140
+ step_size_seg: 0.5625
141
+ vfps: 25
142
+ - target: dataset.transforms.RandomApplyColorDistortion
143
+ params:
144
+ p_color_jitter: 0.0
145
+ s: 1.0
146
+ p_gray_scale: 0.0
147
+ - target: dataset.transforms.RandomHorizontalFlip
148
+ params:
149
+ p: 0.5
150
+ - target: dataset.transforms.AudioRandomReverb
151
+ params:
152
+ p: 0.0
153
+ - target: dataset.transforms.AudioRandomVolume
154
+ params:
155
+ p: 0.0
156
+ gain: 2.0
157
+ gain_type: amplitude
158
+ - target: dataset.transforms.AudioRandomPitchShift
159
+ params:
160
+ p: 0.0
161
+ shift: 1000
162
+ - target: dataset.transforms.AudioRandomLowpassFilter
163
+ params:
164
+ p: 0.0
165
+ cutoff_freq: 100
166
+ - target: dataset.transforms.AudioRandomGaussNoise
167
+ params:
168
+ p: 0.0
169
+ amplitude: 0.01
170
+ - target: dataset.transforms.GenerateMultipleSegments
171
+ params:
172
+ segment_size_vframes: 16
173
+ n_segments: null
174
+ is_start_random: true
175
+ step_size_seg: 0.5625
176
+ - target: dataset.transforms.RGBToHalfToZeroOne
177
+ - target: dataset.transforms.RGBNormalize
178
+ params:
179
+ mean:
180
+ - 0.5
181
+ - 0.5
182
+ - 0.5
183
+ std:
184
+ - 0.5
185
+ - 0.5
186
+ - 0.5
187
+ - target: dataset.transforms.AudioMelSpectrogram
188
+ params:
189
+ sample_rate: 16000
190
+ win_length: 400
191
+ hop_length: 160
192
+ n_fft: 1024
193
+ n_mels: 128
194
+ - target: dataset.transforms.AudioLog
195
+ - target: dataset.transforms.PadOrTruncate
196
+ params:
197
+ max_spec_t: 66
198
+ - target: dataset.transforms.AudioNormalizeAST
199
+ params:
200
+ mean: -4.2677393
201
+ std: 4.5689974
202
+ - target: dataset.transforms.PermuteStreams
203
+ params:
204
+ einops_order_audio: S F T -> S 1 F T
205
+ einops_order_rgb: S T C H W -> S T C H W
206
+ transform_sequence_test:
207
+ - target: dataset.transforms.EqualifyFromRight
208
+ - target: dataset.transforms.RGBSpatialCrop
209
+ params:
210
+ input_size: 224
211
+ is_random: false
212
+ - target: dataset.transforms.TemporalCropAndOffset
213
+ params:
214
+ crop_len_sec: 10
215
+ max_off_sec: 2
216
+ max_wiggle_sec: 0.0
217
+ do_offset: true
218
+ grid_size: 21
219
+ offset_type: grid
220
+ prob_oos: null
221
+ segment_size_vframes: 16
222
+ n_segments: null
223
+ step_size_seg: 0.5625
224
+ vfps: 25
225
+ - target: dataset.transforms.GenerateMultipleSegments
226
+ params:
227
+ segment_size_vframes: 16
228
+ n_segments: null
229
+ is_start_random: false
230
+ step_size_seg: 0.5625
231
+ - target: dataset.transforms.RGBToHalfToZeroOne
232
+ - target: dataset.transforms.RGBNormalize
233
+ params:
234
+ mean:
235
+ - 0.5
236
+ - 0.5
237
+ - 0.5
238
+ std:
239
+ - 0.5
240
+ - 0.5
241
+ - 0.5
242
+ - target: dataset.transforms.AudioMelSpectrogram
243
+ params:
244
+ sample_rate: 16000
245
+ win_length: 400
246
+ hop_length: 160
247
+ n_fft: 1024
248
+ n_mels: 128
249
+ - target: dataset.transforms.AudioLog
250
+ - target: dataset.transforms.PadOrTruncate
251
+ params:
252
+ max_spec_t: 66
253
+ - target: dataset.transforms.AudioNormalizeAST
254
+ params:
255
+ mean: -4.2677393
256
+ std: 4.5689974
257
+ - target: dataset.transforms.PermuteStreams
258
+ params:
259
+ einops_order_audio: S F T -> S 1 F T
260
+ einops_order_rgb: S T C H W -> S T C H W
261
+ logging:
262
+ logdir: /scratch/project_462000293/vladimir/logs/sync/sync_models/
263
+ log_code_state: true
264
+ log_frequency: 20
265
+ patterns_to_ignore:
266
+ - logs
267
+ - .git
268
+ - __pycache__
269
+ - data
270
+ - '*.pt'
271
+ - sbatch_logs
272
+ - '*.mp4'
273
+ - '*.wav'
274
+ - '*.jpg'
275
+ - '*.gif'
276
+ - misc*
277
+ vis_segment_sim: true
278
+ log_max_items: 500000
279
+ use_wandb: true
280
+ start_time: 24-01-04T16-39-21
281
+ config: ./configs/sync.yaml
282
+ ckpt_path: /scratch/project_462000293/vladimir/logs/sync/sync_models/24-01-04T16-39-21/24-01-04T16-39-21.pt