wenzhengzeng commited on
Commit
c1defad
·
1 Parent(s): fcd8077

first commit

Browse files
Files changed (43) hide show
  1. .gitattributes +1 -0
  2. README.md +31 -0
  3. d2vlm/added_tokens.json +3 -0
  4. d2vlm/config.json +3 -0
  5. d2vlm/etbench/.nfs000000005094ac8a000002ba +8 -0
  6. d2vlm/etbench/etbench_0.json +3 -0
  7. d2vlm/etbench/etbench_1.json +3 -0
  8. d2vlm/etbench/etbench_2.json +3 -0
  9. d2vlm/etbench/etbench_3.json +3 -0
  10. d2vlm/etbench/etbench_4.json +3 -0
  11. d2vlm/etbench/etbench_5.json +3 -0
  12. d2vlm/etbench/metrics2_hpc.log +77 -0
  13. d2vlm/generation_config.json +3 -0
  14. d2vlm/model-00001-of-00003.safetensors +3 -0
  15. d2vlm/model-00002-of-00003.safetensors +3 -0
  16. d2vlm/model-00003-of-00003.safetensors +3 -0
  17. d2vlm/model.safetensors.index.json +3 -0
  18. d2vlm/runs/Mar05_05-54-49_hopper-03/events.out.tfevents.1741125477.hopper-03.2084745.0 +3 -0
  19. d2vlm/special_tokens_map.json +3 -0
  20. d2vlm/tokenizer.json +3 -0
  21. d2vlm/tokenizer.model +3 -0
  22. d2vlm/tokenizer_config.json +3 -0
  23. d2vlm/trainer_state.json +3 -0
  24. d2vlm/training_args.bin +3 -0
  25. d2vlm_mcqa_enhanced/added_tokens.json +3 -0
  26. d2vlm_mcqa_enhanced/config.json +3 -0
  27. d2vlm_mcqa_enhanced/etbench/etbench_0.json +3 -0
  28. d2vlm_mcqa_enhanced/etbench/etbench_1.json +3 -0
  29. d2vlm_mcqa_enhanced/etbench/etbench_2.json +3 -0
  30. d2vlm_mcqa_enhanced/etbench/eval_results/metrics.json +3 -0
  31. d2vlm_mcqa_enhanced/etbench/metrics2.log +74 -0
  32. d2vlm_mcqa_enhanced/generation_config.json +3 -0
  33. d2vlm_mcqa_enhanced/model-00001-of-00003.safetensors +3 -0
  34. d2vlm_mcqa_enhanced/model-00002-of-00003.safetensors +3 -0
  35. d2vlm_mcqa_enhanced/model-00003-of-00003.safetensors +3 -0
  36. d2vlm_mcqa_enhanced/model.safetensors.index.json +3 -0
  37. d2vlm_mcqa_enhanced/runs/May15_02-13-20_hopper-04/events.out.tfevents.1747246538.hopper-04.477597.0 +3 -0
  38. d2vlm_mcqa_enhanced/special_tokens_map.json +3 -0
  39. d2vlm_mcqa_enhanced/tokenizer.json +3 -0
  40. d2vlm_mcqa_enhanced/tokenizer.model +3 -0
  41. d2vlm_mcqa_enhanced/tokenizer_config.json +3 -0
  42. d2vlm_mcqa_enhanced/trainer_state.json +3 -0
  43. d2vlm_mcqa_enhanced/training_args.bin +3 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.json filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,3 +1,34 @@
1
  ---
2
  license: apache-2.0
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  license: apache-2.0
3
  ---
4
+
5
+
6
+ # D2VLM Models
7
+
8
+ Here we provided the pre-trained D2VLM models. The performance on the E.T. Bench is shown below.
9
+
10
+ | Model Name | Referring (Acc) | Grounding (F1) | Dense Captioning (F1) | Dense Captioning (Sim) | Complex (Recall) |
11
+ |---------------------|:---------------:|:--------------:|:---------------------:|:----------------------:|:----------------:|
12
+ | D2VLM | 25.3 | 42.3 | 37.5 | 21.8 | 18.1 |
13
+ | D2VLM_mcqa_enhanced | 38.3 | 44.3 | 37.2 | 21.4 | 18.6 |
14
+
15
+
16
+ ## Some Notes
17
+ 1. For the Referring tasks of E.T.Bench (RAR/EVC/RVQ), we adopt a more stringent evaluation protocol compared with the original E.T. Bench, which usually results in lower metric values (e.g., a drop of more than 10% for some existing methods when using our stringent metrics).
18
+
19
+ 2. To enhance basic instruction-following capability, we incorporate
20
+ automatically constructed multiple-choice questions during
21
+ the proposed factorized preference optimization process.
22
+ Due to our proposed factorized preference data synthesis,
23
+ we can easily generate diverse distractor options based on
24
+ different causes of failure and combine them with the original
25
+ correct answer to form multiple-choice questions, without
26
+ requiring additional external data sources. We define the resulting model as "D2VLM_mcqa_enhanced".
27
+
28
+
29
+
30
+
31
+
32
+
33
+
34
+
d2vlm/added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bea32e7248311c2903aa168994db8221c8e03b77f66d909312e20a3137dae8b8
3
+ size 330
d2vlm/config.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15231c47ebb160699c0445e7e367930e93e29280ac8e066c264eabcad2594873
3
+ size 1757
d2vlm/etbench/.nfs000000005094ac8a000002ba ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ Total number of files: 6
2
+ Loading /storage/wenzheng/model_zoo/0306_final_important/dpo_focused_with_only_dpo_rvc_1loss_1/etbench/etbench_5.json...
3
+ Loading /storage/wenzheng/model_zoo/0306_final_important/dpo_focused_with_only_dpo_rvc_1loss_1/etbench/etbench_2.json...
4
+ Loading /storage/wenzheng/model_zoo/0306_final_important/dpo_focused_with_only_dpo_rvc_1loss_1/etbench/etbench_0.json...
5
+ Loading /storage/wenzheng/model_zoo/0306_final_important/dpo_focused_with_only_dpo_rvc_1loss_1/etbench/etbench_4.json...
6
+ Loading /storage/wenzheng/model_zoo/0306_final_important/dpo_focused_with_only_dpo_rvc_1loss_1/etbench/etbench_1.json...
7
+ Loading /storage/wenzheng/model_zoo/0306_final_important/dpo_focused_with_only_dpo_rvc_1loss_1/etbench/etbench_3.json...
8
+ Total number of samples: 7289
d2vlm/etbench/etbench_0.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65dabafaf8391d39ef97ccab249743d90fb94d34620ffb24178bba10205af842
3
+ size 895502
d2vlm/etbench/etbench_1.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2fe343c13be452e7bed73e732a174acd4644e7f3a249f8b95960ae5e59a8af24
3
+ size 911347
d2vlm/etbench/etbench_2.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a8f909c0e328c7c0023454d02a2dc353f4f1c8edaca6aa7b8d7508bfc7075ae
3
+ size 897414
d2vlm/etbench/etbench_3.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b56352aef41c6b01ada8a13bbf37bc7f1ba04b9e5e6f16cd4992f97bc692f037
3
+ size 903885
d2vlm/etbench/etbench_4.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24cf220bef1cb1bf5ae11c09b6f62b47db0d5e263e16ad84ab600ec362d100e5
3
+ size 905938
d2vlm/etbench/etbench_5.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:886b27391a3d67458bf47ce5e9e194b8b59b4e16b870d9b43104f1bdd00eca0f
3
+ size 901487
d2vlm/etbench/metrics2_hpc.log ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Total number of files: 6
2
+ Loading /hpctmp/e1374476/etbench/work_dirs_202503_v3_dpo_abla/dpo_focused_with_only_dpo_rvc_1loss_1/etbench/etbench_2.json...
3
+ Loading /hpctmp/e1374476/etbench/work_dirs_202503_v3_dpo_abla/dpo_focused_with_only_dpo_rvc_1loss_1/etbench/etbench_5.json...
4
+ Loading /hpctmp/e1374476/etbench/work_dirs_202503_v3_dpo_abla/dpo_focused_with_only_dpo_rvc_1loss_1/etbench/etbench_3.json...
5
+ Loading /hpctmp/e1374476/etbench/work_dirs_202503_v3_dpo_abla/dpo_focused_with_only_dpo_rvc_1loss_1/etbench/etbench_0.json...
6
+ Loading /hpctmp/e1374476/etbench/work_dirs_202503_v3_dpo_abla/dpo_focused_with_only_dpo_rvc_1loss_1/etbench/etbench_1.json...
7
+ Loading /hpctmp/e1374476/etbench/work_dirs_202503_v3_dpo_abla/dpo_focused_with_only_dpo_rvc_1loss_1/etbench/etbench_4.json...
8
+ Total number of samples: 7289
9
+ ==========================================
10
+
11
+ Referring
12
+
13
+ ---- --------------- ----- ------ -----
14
+ Task Source Total Failed Acc
15
+ rar perception_test 500 0 0.22
16
+ eca charades_sta 500 0 0.324
17
+ rvq star 500 0 0.214
18
+ ---- --------------- ----- ------ -----
19
+ Mean Acc: 0.25267
20
+
21
+ Grounding
22
+
23
+ ---- ------------------ ----- ------ ------- ------- ------- ------- ------- ---------- ---------- ---------- ---------- -------
24
+ Task Source Total Failed F1@0.1 F1@0.3 F1@0.5 F1@0.7 F1 F1_evi@0.1 F1_evi@0.3 F1_evi@0.5 F1_evi@0.7 F1_evi
25
+ tvg charades_sta 500 0 0.792 0.67 0.518 0.252 0.558 0.82 0.674 0.46 0.206 0.54
26
+ tvg qvhighlights 500 0 0.862 0.818 0.616 0.292 0.647 0.864 0.76 0.504 0.23 0.5895
27
+ epm ego4d_nlq 500 29 0.284 0.156 0.092 0.044 0.144 0.29 0.148 0.086 0.048 0.143
28
+ tal perception_test 500 0 0.7076 0.3156 0.12911 0.04727 0.2999 0.02197 0.00305 0.00133 0.00133 0.00692
29
+ tal thumos14 146 0 0.715 0.38148 0.18891 0.09186 0.34431 0.29704 0.00907 0.00907 0.00411 0.07982
30
+ tal thumos15 292 1 0.71739 0.40306 0.20331 0.10337 0.35678 0.0678 0.02046 0.01275 0.0035 0.02613
31
+ evs summe 25 0 - - - - 0.33475 - - - - 0.33475
32
+ evs tvsum 50 1 - - - - 0.3683 - - - - 0.3683
33
+ vhd qvhighlights 500 0 - - - - 0.752 - - - - 0.738
34
+ vhd youtube_highlights 354 5 - - - - 0.61299 - - - - 0.62994
35
+ ---- ------------------ ----- ------ ------- ------- ------- ------- ------- ---------- ---------- ---------- ---------- -------
36
+ Mean F1: 0.4418
37
+ Mean F1_evi: 0.34564
38
+
39
+ Captioning
40
+
41
+ ---- ---------- ----- ------ ------- ------- ------- ------- ------- ---------- ---------- ---------- ---------- ------- ------- ------- ------- -------
42
+ Task Source Total Failed F1@0.1 F1@0.3 F1@0.5 F1@0.7 F1 F1_evi@0.1 F1_evi@0.3 F1_evi@0.5 F1_evi@0.7 F1_evi METEOR ROUGE_L CIDEr SentSim
43
+ dvc hirest 75 0 0.89316 0.69029 0.37502 0.14721 0.52642 0.88451 0.67243 0.34618 0.12297 0.50652 0.04655 0.08665 0.16912 0.24548
44
+ dvc youcook2 416 0 0.75255 0.56004 0.31417 0.1292 0.43899 0.76133 0.57368 0.309 0.12971 0.44343 0.05737 0.10113 0.18804 0.26356
45
+ slc cross_task 283 0 0.57332 0.30879 0.14523 0.05929 0.27166 0.56944 0.30997 0.1437 0.05647 0.2699 0.03701 0.05178 0.10211 0.19102
46
+ slc ht_step 394 0 0.51419 0.31028 0.162 0.06095 0.26185 0.51646 0.30004 0.15399 0.06558 0.25902 0.02383 0.03638 0.06348 0.17115
47
+ ---- ---------- ----- ------ ------- ------- ------- ------- ------- ---------- ---------- ---------- ---------- ------- ------- ------- ------- -------
48
+ Mean F1: 0.37473
49
+ Mean F1_evi: 0.36972
50
+ Mean SentSim: 0.2178
51
+
52
+ Complex
53
+
54
+ ---- --------------- ----- ------ ------- ------- ------- ------- --------- --------- --------- --------- ------- -------- ---
55
+ Task Source Total Failed R@0.1 R@0.3 R@0.5 R@0.7 R_evi@0.1 R_evi@0.3 R_evi@0.5 R_evi@0.7 mRec mRec_evi Acc
56
+ tem perception_test 500 0 0.84 0.496 0.158 0.06 0.86 0.462 0.136 0.054 0.3885 0.378 -
57
+ tem qvhighlights 464 0 0.50647 0.17888 0.07543 0.02155 0.5194 0.15733 0.0431 0.0194 0.19558 0.18481 -
58
+ gvq qa_ego4d 290 0 0.13448 0.08621 0.04828 0.01379 0.13793 0.08276 0.04483 0.01724 0.07069 0.07069 0.3
59
+ ---- --------------- ----- ------ ------- ------- ------- ------- --------- --------- --------- --------- ------- -------- ---
60
+ TEM Mean Rec: 0.29204
61
+ TEM Mean Rec_evi: 0.28141
62
+ GVQ Mean Rec: 0.07069
63
+ GVQ Mean Rec_evi: 0.07069
64
+
65
+ Overall
66
+
67
+ --------- --------- --------- -------- -------- -------- -------- -------- -------- --------- -------- --------- --------- ---------
68
+ RAR (Acc) EVC (Acc) RVQ (Acc) TVG (F1) EPM (F1) TAL (F1) EVS (F1) VHD (F1) DVC (F1) DVC (Sim) SLC (F1) SLC (Sim) TEM (Rec) GVQ (Rec)
69
+ 22.0 32.4 21.4 60.2 14.4 33.4 35.2 68.2 48.3 25.5 26.7 18.1 29.2 7.1
70
+ --------- --------- --------- -------- -------- -------- -------- -------- -------- --------- -------- --------- --------- ---------
71
+
72
+ Merged
73
+
74
+ --------- -------- -------- --------- ---------
75
+ Acc (ref) F1 (gnd) F1 (cap) Sim (cap) Rec (com)
76
+ 25.3 42.3 37.5 21.8 18.1
77
+ --------- -------- -------- --------- ---------
d2vlm/generation_config.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44f5cbbc5b84aabdce6c130f71cf4340a41525772a9f3b4c24b0b7b5f9758860
3
+ size 259
d2vlm/model-00001-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10665a4143fe5dcc7532650b4905b8571ce4e3c47f3662965e5d0249b12b3658
3
+ size 4972538576
d2vlm/model-00002-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d54851bb08ae53a0b2819ce59d65a1c0edc57034c72b8131c905f433eef0471
3
+ size 4834699624
d2vlm/model-00003-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4424555507cdd240d1b593d624c7454b2072325c43179763e97914ccf3b57ed6
3
+ size 234794112
d2vlm/model.safetensors.index.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa277d9a624b7bd549fd5debc939fd601c132212fbb2b46ed9a70dc6fecb15b3
3
+ size 104897
d2vlm/runs/Mar05_05-54-49_hopper-03/events.out.tfevents.1741125477.hopper-03.2084745.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7eff374e91890ae69cb268824f8ae8e266cf59ac1cd12f594eac76ca1a06e147
3
+ size 966514
d2vlm/special_tokens_map.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d8ab02fd0c388e1368c0e23f0678ba72342b64bd39c6a102f4d0fe6e4229e19
3
+ size 885
d2vlm/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d20c63007592a0ace94858f0748e51d36b62b9af92873ffac271711d77d72d3
3
+ size 1844552
d2vlm/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
d2vlm/tokenizer_config.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d38dfbdb1ca8a44c18d0bf6e760d216c5b2b04c19650051ac6dbcbe6c7019088
3
+ size 3789
d2vlm/trainer_state.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34448b2143ce01ec5a2c60cb9956b85f37660d5cf9e5da5cb8144e36bad4d04e
3
+ size 792222
d2vlm/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:625506e220b083b955f1eaeddb4a8aef2fbdc3e9fd2aeeda16802a0dec4a9e28
3
+ size 6840
d2vlm_mcqa_enhanced/added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bea32e7248311c2903aa168994db8221c8e03b77f66d909312e20a3137dae8b8
3
+ size 330
d2vlm_mcqa_enhanced/config.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15231c47ebb160699c0445e7e367930e93e29280ac8e066c264eabcad2594873
3
+ size 1757
d2vlm_mcqa_enhanced/etbench/etbench_0.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6ebc6612ef47b422d96eca0acde73b3e3d9f1aa203a71837fe92604d053e1de
3
+ size 1779612
d2vlm_mcqa_enhanced/etbench/etbench_1.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20ac2c15d2718efab01ec2354bf54ab12ecd9c212f190062f121b3821b6b3bbb
3
+ size 1807841
d2vlm_mcqa_enhanced/etbench/etbench_2.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e05215051087f490dd130c6e7a5f4062e23c3def2c0d98b03b3ca7b7dba9202
3
+ size 1802941
d2vlm_mcqa_enhanced/etbench/eval_results/metrics.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c954d10a498de2f9f2d1259259804a9083ad0ebd186bc54a065f121dda01556
3
+ size 7696
d2vlm_mcqa_enhanced/etbench/metrics2.log ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Total number of files: 3
2
+ Loading /hpctmp/e1374476/etbench/work_dirs_202505_v3_mcqa/with_mcqa_new_new_3/etbench/etbench_2.json...
3
+ Loading /hpctmp/e1374476/etbench/work_dirs_202505_v3_mcqa/with_mcqa_new_new_3/etbench/etbench_0.json...
4
+ Loading /hpctmp/e1374476/etbench/work_dirs_202505_v3_mcqa/with_mcqa_new_new_3/etbench/etbench_1.json...
5
+ Total number of samples: 7289
6
+ ==========================================
7
+
8
+ Referring
9
+
10
+ ---- --------------- ----- ------ -----
11
+ Task Source Total Failed Acc
12
+ rar perception_test 500 218 0.314
13
+ eca charades_sta 500 61 0.518
14
+ rvq star 500 0 0.316
15
+ ---- --------------- ----- ------ -----
16
+ Mean Acc: 0.38267
17
+
18
+ Grounding
19
+
20
+ ---- ------------------ ----- ------ ------- ------- ------- ------- ------- ---------- ---------- ---------- ---------- -------
21
+ Task Source Total Failed F1@0.1 F1@0.3 F1@0.5 F1@0.7 F1 F1_evi@0.1 F1_evi@0.3 F1_evi@0.5 F1_evi@0.7 F1_evi
22
+ tvg charades_sta 500 0 0.772 0.654 0.506 0.268 0.55 0.822 0.666 0.476 0.208 0.543
23
+ tvg qvhighlights 500 0 0.878 0.818 0.636 0.326 0.6645 0.88 0.792 0.546 0.268 0.6215
24
+ epm ego4d_nlq 500 2 0.342 0.202 0.128 0.072 0.186 0.338 0.2 0.13 0.066 0.1835
25
+ tal perception_test 500 0 0.68665 0.3104 0.12861 0.0452 0.29271 0.03631 0.00725 0.00067 0.0 0.01106
26
+ tal thumos14 146 0 0.72471 0.45248 0.26647 0.13045 0.39353 0.717 0.04375 0.01795 0.00337 0.19552
27
+ tal thumos15 292 0 0.7394 0.47117 0.28723 0.13821 0.409 0.14895 0.0564 0.05858 0.00654 0.06762
28
+ evs summe 25 0 - - - - 0.33995 - - - - 0.33995
29
+ evs tvsum 50 2 - - - - 0.39421 - - - - 0.39421
30
+ vhd qvhighlights 500 0 - - - - 0.788 - - - - 0.784
31
+ vhd youtube_highlights 354 1 - - - - 0.5904 - - - - 0.61864
32
+ ---- ------------------ ----- ------ ------- ------- ------- ------- ------- ---------- ---------- ---------- ---------- -------
33
+ Mean F1: 0.46083
34
+ Mean F1_evi: 0.3759
35
+
36
+ Captioning
37
+
38
+ ---- ---------- ----- ------ ------- ------- ------- ------- ------- ---------- ---------- ---------- ---------- ------- ------- ------- ------- -------
39
+ Task Source Total Failed F1@0.1 F1@0.3 F1@0.5 F1@0.7 F1 F1_evi@0.1 F1_evi@0.3 F1_evi@0.5 F1_evi@0.7 F1_evi METEOR ROUGE_L CIDEr SentSim
40
+ dvc hirest 75 0 0.86162 0.66773 0.36106 0.15242 0.51071 0.87867 0.66247 0.32752 0.10908 0.49443 0.03776 0.07414 0.12801 0.24272
41
+ dvc youcook2 416 0 0.74962 0.56173 0.29817 0.13441 0.43598 0.76738 0.57274 0.30428 0.13088 0.44382 0.05808 0.10111 0.20697 0.26529
42
+ slc cross_task 283 0 0.57813 0.32441 0.15422 0.05321 0.27749 0.58725 0.32261 0.1486 0.05584 0.27857 0.03543 0.0487 0.09534 0.18445
43
+ slc ht_step 394 0 0.51731 0.31613 0.16158 0.06471 0.26493 0.53247 0.32258 0.16604 0.05938 0.27012 0.02585 0.03857 0.07178 0.16471
44
+ ---- ---------- ----- ------ ------- ------- ------- ------- ------- ---------- ---------- ---------- ---------- ------- ------- ------- ------- -------
45
+ Mean F1: 0.37228
46
+ Mean F1_evi: 0.37174
47
+ Mean SentSim: 0.21429
48
+
49
+ Complex
50
+
51
+ ---- --------------- ----- ------ ------- ------- ------- ------- --------- --------- --------- --------- ------- -------- -------
52
+ Task Source Total Failed R@0.1 R@0.3 R@0.5 R@0.7 R_evi@0.1 R_evi@0.3 R_evi@0.5 R_evi@0.7 mRec mRec_evi Acc
53
+ tem perception_test 500 0 0.828 0.456 0.13 0.036 0.876 0.41 0.1 0.022 0.3625 0.352 -
54
+ tem qvhighlights 464 0 0.54741 0.19397 0.07328 0.02155 0.54957 0.17241 0.05388 0.02155 0.20905 0.19935 -
55
+ gvq qa_ego4d 290 1 0.16207 0.11379 0.05172 0.01724 0.14483 0.08621 0.04138 0.01379 0.08621 0.07155 0.44483
56
+ ---- --------------- ----- ------ ------- ------- ------- ------- --------- --------- --------- --------- ------- -------- -------
57
+ TEM Mean Rec: 0.28578
58
+ TEM Mean Rec_evi: 0.27568
59
+ GVQ Mean Rec: 0.08621
60
+ GVQ Mean Rec_evi: 0.07155
61
+
62
+ Overall
63
+
64
+ --------- --------- --------- -------- -------- -------- -------- -------- -------- --------- -------- --------- --------- ---------
65
+ RAR (Acc) EVC (Acc) RVQ (Acc) TVG (F1) EPM (F1) TAL (F1) EVS (F1) VHD (F1) DVC (F1) DVC (Sim) SLC (F1) SLC (Sim) TEM (Rec) GVQ (Rec)
66
+ 31.4 51.8 31.6 60.7 18.6 36.5 36.7 68.9 47.3 25.4 27.1 17.5 28.6 8.6
67
+ --------- --------- --------- -------- -------- -------- -------- -------- -------- --------- -------- --------- --------- ---------
68
+
69
+ Merged
70
+
71
+ --------- -------- -------- --------- ---------
72
+ Acc (ref) F1 (gnd) F1 (cap) Sim (cap) Rec (com)
73
+ 38.3 44.3 37.2 21.4 18.6
74
+ --------- -------- -------- --------- ---------
d2vlm_mcqa_enhanced/generation_config.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44f5cbbc5b84aabdce6c130f71cf4340a41525772a9f3b4c24b0b7b5f9758860
3
+ size 259
d2vlm_mcqa_enhanced/model-00001-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20506e33924d3c407e417fa1da60991fa0047ed11157a25a10c04e771bfe02ff
3
+ size 4972538576
d2vlm_mcqa_enhanced/model-00002-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12eb827497126eaaf1ec725fff60cf04dcc9ec61fe827da23d2884bc13b53019
3
+ size 4834699624
d2vlm_mcqa_enhanced/model-00003-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13d8153af0a8c41a499f123c36e620bc76418cc378115a06ad331fb7377c2888
3
+ size 234794112
d2vlm_mcqa_enhanced/model.safetensors.index.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa277d9a624b7bd549fd5debc939fd601c132212fbb2b46ed9a70dc6fecb15b3
3
+ size 104897
d2vlm_mcqa_enhanced/runs/May15_02-13-20_hopper-04/events.out.tfevents.1747246538.hopper-04.477597.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:750d34d09e5eb284d74c9bf21232f2a6f9ae8365971a0f0a247bbf486aeb35af
3
+ size 1843154
d2vlm_mcqa_enhanced/special_tokens_map.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d8ab02fd0c388e1368c0e23f0678ba72342b64bd39c6a102f4d0fe6e4229e19
3
+ size 885
d2vlm_mcqa_enhanced/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d20c63007592a0ace94858f0748e51d36b62b9af92873ffac271711d77d72d3
3
+ size 1844552
d2vlm_mcqa_enhanced/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
d2vlm_mcqa_enhanced/tokenizer_config.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d38dfbdb1ca8a44c18d0bf6e760d216c5b2b04c19650051ac6dbcbe6c7019088
3
+ size 3789
d2vlm_mcqa_enhanced/trainer_state.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b30ae11ca7737c6f5cbd8639790ffa1cf1008d1d480a24154c4db1a84f8ad25
3
+ size 1514456
d2vlm_mcqa_enhanced/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74b9137325bb91129167eab7021590a181417b4117966ec8f2e64196653a8076
3
+ size 6776