baohuynhbk14
/

miniCPM_finetune_lora_viet_vqa

@@ -14,6 +14,8 @@ should probably proofread and complete it, then remove this comment. -->
 # miniCPM_finetune_lora_viet_vqa
 This model is a fine-tuned version of [openbmb/MiniCPM-V-2_6](https://huggingface.co/openbmb/MiniCPM-V-2_6) on an unknown dataset.
 ## Model description
@@ -44,10 +46,15 @@ The following hyperparameters were used during training:
 - optimizer: Adam with betas=(0.9,0.95) and epsilon=1e-08
 - lr_scheduler_type: cosine
 - lr_scheduler_warmup_ratio: 0.01
-- num_epochs: 2.0
 ### Training results
 ### Framework versions

 # miniCPM_finetune_lora_viet_vqa
 This model is a fine-tuned version of [openbmb/MiniCPM-V-2_6](https://huggingface.co/openbmb/MiniCPM-V-2_6) on an unknown dataset.
+It achieves the following results on the evaluation set:
+- Loss: 1.6850
 ## Model description
 - optimizer: Adam with betas=(0.9,0.95) and epsilon=1e-08
 - lr_scheduler_type: cosine
 - lr_scheduler_warmup_ratio: 0.01
+- num_epochs: 5.0
 ### Training results
+| Training Loss | Epoch  | Step | Validation Loss |
+|:-------------:|:------:|:----:|:---------------:|
+| 2.1566        | 1.3889 | 100  | 2.0881          |
+| 1.8447        | 2.7778 | 200  | 1.8452          |
+| 1.7103        | 4.1667 | 300  | 1.6850          |
 ### Framework versions

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4a93cdb910600fe2b82d0ab66ee95a2a8021cbdf41aa8c903620c4ff95904923
 size 2140797224

 version https://git-lfs.github.com/spec/v1
+oid sha256:80e34b9592f57d69fb2c668c84a46ee56010ad58879663e16e882641d36c912e
 size 2140797224

trainer_state.json CHANGED Viewed

@@ -1,225 +1,557 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 2.0,
-  "eval_steps": 1000,
-  "global_step": 144,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
       "epoch": 0.06944444444444445,
-      "grad_norm": 8.71909236907959,
       "learning_rate": 1e-06,
-      "loss": 2.6593,
       "step": 5
     },
     {
       "epoch": 0.1388888888888889,
-      "grad_norm": 8.853459358215332,
       "learning_rate": 1e-06,
-      "loss": 2.4766,
       "step": 10
     },
     {
       "epoch": 0.20833333333333334,
-      "grad_norm": 6.577568054199219,
       "learning_rate": 1e-06,
-      "loss": 2.495,
       "step": 15
     },
     {
       "epoch": 0.2777777777777778,
-      "grad_norm": 8.3203706741333,
       "learning_rate": 1e-06,
-      "loss": 2.6105,
       "step": 20
     },
     {
       "epoch": 0.3472222222222222,
-      "grad_norm": 7.732483863830566,
       "learning_rate": 1e-06,
-      "loss": 2.4744,
       "step": 25
     },
     {
       "epoch": 0.4166666666666667,
-      "grad_norm": 6.6942548751831055,
       "learning_rate": 1e-06,
-      "loss": 2.5163,
       "step": 30
     },
     {
       "epoch": 0.4861111111111111,
-      "grad_norm": 5.781284809112549,
       "learning_rate": 1e-06,
-      "loss": 2.3854,
       "step": 35
     },
     {
       "epoch": 0.5555555555555556,
-      "grad_norm": 7.311328887939453,
       "learning_rate": 1e-06,
-      "loss": 2.4442,
       "step": 40
     },
     {
       "epoch": 0.625,
-      "grad_norm": 6.254249572753906,
       "learning_rate": 1e-06,
-      "loss": 2.2468,
       "step": 45
     },
     {
       "epoch": 0.6944444444444444,
-      "grad_norm": 7.6778669357299805,
       "learning_rate": 1e-06,
-      "loss": 2.2565,
       "step": 50
     },
     {
       "epoch": 0.7638888888888888,
-      "grad_norm": 7.495645523071289,
       "learning_rate": 1e-06,
-      "loss": 2.4391,
       "step": 55
     },
     {
       "epoch": 0.8333333333333334,
-      "grad_norm": 5.458991527557373,
       "learning_rate": 1e-06,
-      "loss": 2.2362,
       "step": 60
     },
     {
       "epoch": 0.9027777777777778,
-      "grad_norm": 5.659170627593994,
       "learning_rate": 1e-06,
-      "loss": 2.2188,
       "step": 65
     },
     {
       "epoch": 0.9722222222222222,
-      "grad_norm": 4.8217997550964355,
       "learning_rate": 1e-06,
-      "loss": 2.154,
       "step": 70
     },
     {
       "epoch": 1.0416666666666667,
-      "grad_norm": 6.693627834320068,
       "learning_rate": 1e-06,
-      "loss": 2.1182,
       "step": 75
     },
     {
       "epoch": 1.1111111111111112,
-      "grad_norm": 6.67255973815918,
       "learning_rate": 1e-06,
-      "loss": 2.1879,
       "step": 80
     },
     {
       "epoch": 1.1805555555555556,
-      "grad_norm": 4.831326007843018,
       "learning_rate": 1e-06,
-      "loss": 2.1077,
       "step": 85
     },
     {
       "epoch": 1.25,
-      "grad_norm": 4.830414295196533,
       "learning_rate": 1e-06,
-      "loss": 2.026,
       "step": 90
     },
     {
       "epoch": 1.3194444444444444,
-      "grad_norm": 5.039080619812012,
       "learning_rate": 1e-06,
-      "loss": 2.0585,
       "step": 95
     },
     {
       "epoch": 1.3888888888888888,
-      "grad_norm": 5.749475002288818,
       "learning_rate": 1e-06,
-      "loss": 2.0486,
       "step": 100
     },
     {
       "epoch": 1.4583333333333333,
-      "grad_norm": 5.0571770668029785,
       "learning_rate": 1e-06,
-      "loss": 1.9616,
       "step": 105
     },
     {
       "epoch": 1.5277777777777777,
-      "grad_norm": 4.597809314727783,
       "learning_rate": 1e-06,
-      "loss": 1.9063,
       "step": 110
     },
     {
       "epoch": 1.5972222222222223,
-      "grad_norm": 5.453224182128906,
       "learning_rate": 1e-06,
-      "loss": 2.1802,
       "step": 115
     },
     {
       "epoch": 1.6666666666666665,
-      "grad_norm": 4.519564628601074,
       "learning_rate": 1e-06,
-      "loss": 2.2049,
       "step": 120
     },
     {
       "epoch": 1.7361111111111112,
-      "grad_norm": 4.976806163787842,
       "learning_rate": 1e-06,
-      "loss": 1.8881,
       "step": 125
     },
     {
       "epoch": 1.8055555555555556,
-      "grad_norm": 4.543058395385742,
       "learning_rate": 1e-06,
-      "loss": 2.0673,
       "step": 130
     },
     {
       "epoch": 1.875,
-      "grad_norm": 4.89597225189209,
       "learning_rate": 1e-06,
-      "loss": 2.0158,
       "step": 135
     },
     {
       "epoch": 1.9444444444444444,
-      "grad_norm": 4.95186185836792,
       "learning_rate": 1e-06,
-      "loss": 2.0052,
       "step": 140
     },
     {
-      "epoch": 2.0,
-      "step": 144,
-      "total_flos": 9.601343958286336e+16,
-      "train_loss": 2.2213990655210285,
-      "train_runtime": 2501.8045,
-      "train_samples_per_second": 0.921,
-      "train_steps_per_second": 0.058
     }
   ],
   "logging_steps": 5,
-  "max_steps": 144,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 2,
-  "save_steps": 200,
-  "total_flos": 9.601343958286336e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 5.0,
+  "eval_steps": 100,
+  "global_step": 360,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
       "epoch": 0.06944444444444445,
+      "grad_norm": 6.3636603355407715,
       "learning_rate": 1e-06,
+      "loss": 2.6727,
       "step": 5
     },
     {
       "epoch": 0.1388888888888889,
+      "grad_norm": 7.486879825592041,
       "learning_rate": 1e-06,
+      "loss": 2.3642,
       "step": 10
     },
     {
       "epoch": 0.20833333333333334,
+      "grad_norm": 6.5991997718811035,
       "learning_rate": 1e-06,
+      "loss": 2.515,
       "step": 15
     },
     {
       "epoch": 0.2777777777777778,
+      "grad_norm": 7.580630779266357,
       "learning_rate": 1e-06,
+      "loss": 2.3997,
       "step": 20
     },
     {
       "epoch": 0.3472222222222222,
+      "grad_norm": 7.5727410316467285,
       "learning_rate": 1e-06,
+      "loss": 2.4551,
       "step": 25
     },
     {
       "epoch": 0.4166666666666667,
+      "grad_norm": 8.835946083068848,
       "learning_rate": 1e-06,
+      "loss": 2.4476,
       "step": 30
     },
     {
       "epoch": 0.4861111111111111,
+      "grad_norm": 7.495606899261475,
       "learning_rate": 1e-06,
+      "loss": 2.4191,
       "step": 35
     },
     {
       "epoch": 0.5555555555555556,
+      "grad_norm": 8.057035446166992,
       "learning_rate": 1e-06,
+      "loss": 2.441,
       "step": 40
     },
     {
       "epoch": 0.625,
+      "grad_norm": 6.828744411468506,
       "learning_rate": 1e-06,
+      "loss": 2.3052,
       "step": 45
     },
     {
       "epoch": 0.6944444444444444,
+      "grad_norm": 7.163251876831055,
       "learning_rate": 1e-06,
+      "loss": 2.1357,
       "step": 50
     },
     {
       "epoch": 0.7638888888888888,
+      "grad_norm": 5.414941787719727,
       "learning_rate": 1e-06,
+      "loss": 2.2248,
       "step": 55
     },
     {
       "epoch": 0.8333333333333334,
+      "grad_norm": 6.0801544189453125,
       "learning_rate": 1e-06,
+      "loss": 2.2934,
       "step": 60
     },
     {
       "epoch": 0.9027777777777778,
+      "grad_norm": 6.054081439971924,
       "learning_rate": 1e-06,
+      "loss": 2.3014,
       "step": 65
     },
     {
       "epoch": 0.9722222222222222,
+      "grad_norm": 5.827741622924805,
       "learning_rate": 1e-06,
+      "loss": 2.2515,
       "step": 70
     },
     {
       "epoch": 1.0416666666666667,
+      "grad_norm": 3.5676162242889404,
       "learning_rate": 1e-06,
+      "loss": 2.0915,
       "step": 75
     },
     {
       "epoch": 1.1111111111111112,
+      "grad_norm": 5.15900993347168,
       "learning_rate": 1e-06,
+      "loss": 2.0749,
       "step": 80
     },
     {
       "epoch": 1.1805555555555556,
+      "grad_norm": 5.206437110900879,
       "learning_rate": 1e-06,
+      "loss": 2.0539,
       "step": 85
     },
     {
       "epoch": 1.25,
+      "grad_norm": 5.990969657897949,
       "learning_rate": 1e-06,
+      "loss": 2.1308,
       "step": 90
     },
     {
       "epoch": 1.3194444444444444,
+      "grad_norm": 6.198008060455322,
       "learning_rate": 1e-06,
+      "loss": 2.3256,
       "step": 95
     },
     {
       "epoch": 1.3888888888888888,
+      "grad_norm": 5.184628486633301,
       "learning_rate": 1e-06,
+      "loss": 2.1566,
+      "step": 100
+    },
+    {
+      "epoch": 1.3888888888888888,
+      "eval_loss": 2.0880796909332275,
+      "eval_runtime": 34.0667,
+      "eval_samples_per_second": 2.935,
+      "eval_steps_per_second": 0.734,
       "step": 100
     },
     {
       "epoch": 1.4583333333333333,
+      "grad_norm": 5.412724494934082,
       "learning_rate": 1e-06,
+      "loss": 1.9085,
       "step": 105
     },
     {
       "epoch": 1.5277777777777777,
+      "grad_norm": 3.459959030151367,
       "learning_rate": 1e-06,
+      "loss": 1.9494,
       "step": 110
     },
     {
       "epoch": 1.5972222222222223,
+      "grad_norm": 5.159445762634277,
       "learning_rate": 1e-06,
+      "loss": 1.9334,
       "step": 115
     },
     {
       "epoch": 1.6666666666666665,
+      "grad_norm": 5.133082389831543,
       "learning_rate": 1e-06,
+      "loss": 2.0826,
       "step": 120
     },
     {
       "epoch": 1.7361111111111112,
+      "grad_norm": 4.473026752471924,
       "learning_rate": 1e-06,
+      "loss": 2.0585,
       "step": 125
     },
     {
       "epoch": 1.8055555555555556,
+      "grad_norm": 5.063863754272461,
       "learning_rate": 1e-06,
+      "loss": 2.1289,
       "step": 130
     },
     {
       "epoch": 1.875,
+      "grad_norm": 4.927737236022949,
       "learning_rate": 1e-06,
+      "loss": 1.9872,
       "step": 135
     },
     {
       "epoch": 1.9444444444444444,
+      "grad_norm": 5.563902854919434,
       "learning_rate": 1e-06,
+      "loss": 1.9803,
       "step": 140
     },
     {
+      "epoch": 2.013888888888889,
+      "grad_norm": 3.901442050933838,
+      "learning_rate": 1e-06,
+      "loss": 1.8309,
+      "step": 145
+    },
+    {
+      "epoch": 2.0833333333333335,
+      "grad_norm": 3.771136999130249,
+      "learning_rate": 1e-06,
+      "loss": 1.7758,
+      "step": 150
+    },
+    {
+      "epoch": 2.1527777777777777,
+      "grad_norm": 4.6159257888793945,
+      "learning_rate": 1e-06,
+      "loss": 1.9193,
+      "step": 155
+    },
+    {
+      "epoch": 2.2222222222222223,
+      "grad_norm": 3.758843183517456,
+      "learning_rate": 1e-06,
+      "loss": 1.9329,
+      "step": 160
+    },
+    {
+      "epoch": 2.2916666666666665,
+      "grad_norm": 4.267579078674316,
+      "learning_rate": 1e-06,
+      "loss": 2.0399,
+      "step": 165
+    },
+    {
+      "epoch": 2.361111111111111,
+      "grad_norm": 3.9819560050964355,
+      "learning_rate": 1e-06,
+      "loss": 1.9568,
+      "step": 170
+    },
+    {
+      "epoch": 2.4305555555555554,
+      "grad_norm": 3.8918192386627197,
+      "learning_rate": 1e-06,
+      "loss": 1.7377,
+      "step": 175
+    },
+    {
+      "epoch": 2.5,
+      "grad_norm": 3.9746928215026855,
+      "learning_rate": 1e-06,
+      "loss": 1.8949,
+      "step": 180
+    },
+    {
+      "epoch": 2.5694444444444446,
+      "grad_norm": 3.328784704208374,
+      "learning_rate": 1e-06,
+      "loss": 1.6509,
+      "step": 185
+    },
+    {
+      "epoch": 2.638888888888889,
+      "grad_norm": 3.835324287414551,
+      "learning_rate": 1e-06,
+      "loss": 1.8321,
+      "step": 190
+    },
+    {
+      "epoch": 2.7083333333333335,
+      "grad_norm": 3.3603885173797607,
+      "learning_rate": 1e-06,
+      "loss": 1.8628,
+      "step": 195
+    },
+    {
+      "epoch": 2.7777777777777777,
+      "grad_norm": 3.7577502727508545,
+      "learning_rate": 1e-06,
+      "loss": 1.8447,
+      "step": 200
+    },
+    {
+      "epoch": 2.7777777777777777,
+      "eval_loss": 1.8452154397964478,
+      "eval_runtime": 34.0911,
+      "eval_samples_per_second": 2.933,
+      "eval_steps_per_second": 0.733,
+      "step": 200
+    },
+    {
+      "epoch": 2.8472222222222223,
+      "grad_norm": 4.379385948181152,
+      "learning_rate": 1e-06,
+      "loss": 1.8212,
+      "step": 205
+    },
+    {
+      "epoch": 2.9166666666666665,
+      "grad_norm": 3.7095022201538086,
+      "learning_rate": 1e-06,
+      "loss": 1.7862,
+      "step": 210
+    },
+    {
+      "epoch": 2.986111111111111,
+      "grad_norm": 4.164438724517822,
+      "learning_rate": 1e-06,
+      "loss": 1.8046,
+      "step": 215
+    },
+    {
+      "epoch": 3.0555555555555554,
+      "grad_norm": 3.6749582290649414,
+      "learning_rate": 1e-06,
+      "loss": 1.6358,
+      "step": 220
+    },
+    {
+      "epoch": 3.125,
+      "grad_norm": 3.7247958183288574,
+      "learning_rate": 1e-06,
+      "loss": 1.791,
+      "step": 225
+    },
+    {
+      "epoch": 3.1944444444444446,
+      "grad_norm": 2.9533472061157227,
+      "learning_rate": 1e-06,
+      "loss": 1.6251,
+      "step": 230
+    },
+    {
+      "epoch": 3.263888888888889,
+      "grad_norm": 4.062502384185791,
+      "learning_rate": 1e-06,
+      "loss": 1.6976,
+      "step": 235
+    },
+    {
+      "epoch": 3.3333333333333335,
+      "grad_norm": 4.328882217407227,
+      "learning_rate": 1e-06,
+      "loss": 1.8438,
+      "step": 240
+    },
+    {
+      "epoch": 3.4027777777777777,
+      "grad_norm": 4.158596038818359,
+      "learning_rate": 1e-06,
+      "loss": 1.8998,
+      "step": 245
+    },
+    {
+      "epoch": 3.4722222222222223,
+      "grad_norm": 5.7752556800842285,
+      "learning_rate": 1e-06,
+      "loss": 1.7517,
+      "step": 250
+    },
+    {
+      "epoch": 3.5416666666666665,
+      "grad_norm": 4.568635940551758,
+      "learning_rate": 1e-06,
+      "loss": 1.6835,
+      "step": 255
+    },
+    {
+      "epoch": 3.611111111111111,
+      "grad_norm": 3.6611974239349365,
+      "learning_rate": 1e-06,
+      "loss": 1.7852,
+      "step": 260
+    },
+    {
+      "epoch": 3.6805555555555554,
+      "grad_norm": 4.026912212371826,
+      "learning_rate": 1e-06,
+      "loss": 1.7916,
+      "step": 265
+    },
+    {
+      "epoch": 3.75,
+      "grad_norm": 4.750195026397705,
+      "learning_rate": 1e-06,
+      "loss": 1.7584,
+      "step": 270
+    },
+    {
+      "epoch": 3.8194444444444446,
+      "grad_norm": 3.936798572540283,
+      "learning_rate": 1e-06,
+      "loss": 1.5877,
+      "step": 275
+    },
+    {
+      "epoch": 3.888888888888889,
+      "grad_norm": 4.1127800941467285,
+      "learning_rate": 1e-06,
+      "loss": 1.5392,
+      "step": 280
+    },
+    {
+      "epoch": 3.9583333333333335,
+      "grad_norm": 3.6437580585479736,
+      "learning_rate": 1e-06,
+      "loss": 1.6125,
+      "step": 285
+    },
+    {
+      "epoch": 4.027777777777778,
+      "grad_norm": 3.641177177429199,
+      "learning_rate": 1e-06,
+      "loss": 1.687,
+      "step": 290
+    },
+    {
+      "epoch": 4.097222222222222,
+      "grad_norm": 3.797327995300293,
+      "learning_rate": 1e-06,
+      "loss": 1.7779,
+      "step": 295
+    },
+    {
+      "epoch": 4.166666666666667,
+      "grad_norm": 5.071943283081055,
+      "learning_rate": 1e-06,
+      "loss": 1.7103,
+      "step": 300
+    },
+    {
+      "epoch": 4.166666666666667,
+      "eval_loss": 1.6850143671035767,
+      "eval_runtime": 34.4694,
+      "eval_samples_per_second": 2.901,
+      "eval_steps_per_second": 0.725,
+      "step": 300
+    },
+    {
+      "epoch": 4.236111111111111,
+      "grad_norm": 6.09140682220459,
+      "learning_rate": 1e-06,
+      "loss": 1.6347,
+      "step": 305
+    },
+    {
+      "epoch": 4.305555555555555,
+      "grad_norm": 5.452902317047119,
+      "learning_rate": 1e-06,
+      "loss": 1.7689,
+      "step": 310
+    },
+    {
+      "epoch": 4.375,
+      "grad_norm": 3.5834009647369385,
+      "learning_rate": 1e-06,
+      "loss": 1.6514,
+      "step": 315
+    },
+    {
+      "epoch": 4.444444444444445,
+      "grad_norm": 3.288220167160034,
+      "learning_rate": 1e-06,
+      "loss": 1.4941,
+      "step": 320
+    },
+    {
+      "epoch": 4.513888888888889,
+      "grad_norm": 4.202756404876709,
+      "learning_rate": 1e-06,
+      "loss": 1.5374,
+      "step": 325
+    },
+    {
+      "epoch": 4.583333333333333,
+      "grad_norm": 3.9757556915283203,
+      "learning_rate": 1e-06,
+      "loss": 1.6289,
+      "step": 330
+    },
+    {
+      "epoch": 4.652777777777778,
+      "grad_norm": 3.3575947284698486,
+      "learning_rate": 1e-06,
+      "loss": 1.5446,
+      "step": 335
+    },
+    {
+      "epoch": 4.722222222222222,
+      "grad_norm": 4.207667350769043,
+      "learning_rate": 1e-06,
+      "loss": 1.5668,
+      "step": 340
+    },
+    {
+      "epoch": 4.791666666666667,
+      "grad_norm": 3.2263221740722656,
+      "learning_rate": 1e-06,
+      "loss": 1.4529,
+      "step": 345
+    },
+    {
+      "epoch": 4.861111111111111,
+      "grad_norm": 3.272395610809326,
+      "learning_rate": 1e-06,
+      "loss": 1.5215,
+      "step": 350
+    },
+    {
+      "epoch": 4.930555555555555,
+      "grad_norm": 3.4315106868743896,
+      "learning_rate": 1e-06,
+      "loss": 1.5781,
+      "step": 355
+    },
+    {
+      "epoch": 5.0,
+      "grad_norm": 3.9581406116485596,
+      "learning_rate": 1e-06,
+      "loss": 1.5001,
+      "step": 360
+    },
+    {
+      "epoch": 5.0,
+      "step": 360,
+      "total_flos": 2.3797808143060173e+17,
+      "train_loss": 1.9143991947174073,
+      "train_runtime": 6464.4185,
+      "train_samples_per_second": 0.891,
+      "train_steps_per_second": 0.056
     }
   ],
   "logging_steps": 5,
+  "max_steps": 360,
   "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
+  "save_steps": 100,
+  "total_flos": 2.3797808143060173e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null