{
  "best_global_step": null,
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 2.0,
  "eval_steps": 500,
  "global_step": 478,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.04184100418410042,
      "grad_norm": 0.2925145155398498,
      "learning_rate": 3.7500000000000005e-06,
      "loss": 0.0243,
      "step": 10
    },
    {
      "epoch": 0.08368200836820083,
      "grad_norm": 0.09332366083077137,
      "learning_rate": 7.916666666666667e-06,
      "loss": 0.0066,
      "step": 20
    },
    {
      "epoch": 0.12552301255230125,
      "grad_norm": 0.037000329123698675,
      "learning_rate": 1.2083333333333333e-05,
      "loss": 0.0044,
      "step": 30
    },
    {
      "epoch": 0.16736401673640167,
      "grad_norm": 0.024975645672741548,
      "learning_rate": 1.6250000000000002e-05,
      "loss": 0.0028,
      "step": 40
    },
    {
      "epoch": 0.20920502092050208,
      "grad_norm": 0.033927305503936314,
      "learning_rate": 1.9999733110857237e-05,
      "loss": 0.0025,
      "step": 50
    },
    {
      "epoch": 0.2510460251046025,
      "grad_norm": 0.014612909987451435,
      "learning_rate": 1.9967723647752463e-05,
      "loss": 0.0018,
      "step": 60
    },
    {
      "epoch": 0.2928870292887029,
      "grad_norm": 0.017483510752084472,
      "learning_rate": 1.988253206622306e-05,
      "loss": 0.0019,
      "step": 70
    },
    {
      "epoch": 0.33472803347280333,
      "grad_norm": 0.014386718401807688,
      "learning_rate": 1.9744612900216588e-05,
      "loss": 0.0026,
      "step": 80
    },
    {
      "epoch": 0.37656903765690375,
      "grad_norm": 0.039483682416672744,
      "learning_rate": 1.9554702008157567e-05,
      "loss": 0.0017,
      "step": 90
    },
    {
      "epoch": 0.41841004184100417,
      "grad_norm": 0.017654984549017917,
      "learning_rate": 1.9313812646824432e-05,
      "loss": 0.0013,
      "step": 100
    },
    {
      "epoch": 0.4602510460251046,
      "grad_norm": 0.00968661778293667,
      "learning_rate": 1.9023230065186192e-05,
      "loss": 0.0031,
      "step": 110
    },
    {
      "epoch": 0.502092050209205,
      "grad_norm": 0.023973932216176105,
      "learning_rate": 1.8684504647043093e-05,
      "loss": 0.0022,
      "step": 120
    },
    {
      "epoch": 0.5439330543933054,
      "grad_norm": 0.021080771265352845,
      "learning_rate": 1.8299443639058238e-05,
      "loss": 0.0024,
      "step": 130
    },
    {
      "epoch": 0.5857740585774058,
      "grad_norm": 0.02233998245288774,
      "learning_rate": 1.7870101508314686e-05,
      "loss": 0.0008,
      "step": 140
    },
    {
      "epoch": 0.6276150627615062,
      "grad_norm": 0.015428869613923153,
      "learning_rate": 1.7398768980844664e-05,
      "loss": 0.002,
      "step": 150
    },
    {
      "epoch": 0.6694560669456067,
      "grad_norm": 0.011099263805834297,
      "learning_rate": 1.6887960819615025e-05,
      "loss": 0.0011,
      "step": 160
    },
    {
      "epoch": 0.7112970711297071,
      "grad_norm": 0.015474457950600984,
      "learning_rate": 1.634040240717878e-05,
      "loss": 0.0008,
      "step": 170
    },
    {
      "epoch": 0.7531380753138075,
      "grad_norm": 0.016896916614250686,
      "learning_rate": 1.5759015204579958e-05,
      "loss": 0.0012,
      "step": 180
    },
    {
      "epoch": 0.7949790794979079,
      "grad_norm": 0.010764154560653242,
      "learning_rate": 1.5146901164094914e-05,
      "loss": 0.0013,
      "step": 190
    },
    {
      "epoch": 0.8368200836820083,
      "grad_norm": 0.02624812740713589,
      "learning_rate": 1.4507326178974789e-05,
      "loss": 0.0021,
      "step": 200
    },
    {
      "epoch": 0.8786610878661087,
      "grad_norm": 0.02213838592470387,
      "learning_rate": 1.3843702658491961e-05,
      "loss": 0.0014,
      "step": 210
    },
    {
      "epoch": 0.9205020920502092,
      "grad_norm": 0.03256425649897184,
      "learning_rate": 1.3159571321260114e-05,
      "loss": 0.0007,
      "step": 220
    },
    {
      "epoch": 0.9623430962343096,
      "grad_norm": 0.008534054334232773,
      "learning_rate": 1.2458582303968466e-05,
      "loss": 0.0011,
      "step": 230
    },
    {
      "epoch": 1.00418410041841,
      "grad_norm": 0.015659822283413834,
      "learning_rate": 1.1744475686323225e-05,
      "loss": 0.0018,
      "step": 240
    },
    {
      "epoch": 1.0460251046025104,
      "grad_norm": 0.009318785906390915,
      "learning_rate": 1.1021061536104093e-05,
      "loss": 0.0007,
      "step": 250
    },
    {
      "epoch": 1.0878661087866108,
      "grad_norm": 0.02012715152590903,
      "learning_rate": 1.02921995808042e-05,
      "loss": 0.0008,
      "step": 260
    },
    {
      "epoch": 1.1297071129707112,
      "grad_norm": 0.01535009737960017,
      "learning_rate": 9.561778614313876e-06,
      "loss": 0.0005,
      "step": 270
    },
    {
      "epoch": 1.1715481171548117,
      "grad_norm": 0.011963284930504645,
      "learning_rate": 8.833695748522702e-06,
      "loss": 0.0011,
      "step": 280
    },
    {
      "epoch": 1.213389121338912,
      "grad_norm": 0.01174691491766897,
      "learning_rate": 8.111835620541397e-06,
      "loss": 0.0008,
      "step": 290
    },
    {
      "epoch": 1.2552301255230125,
      "grad_norm": 0.015140100128090778,
      "learning_rate": 7.400049666482061e-06,
      "loss": 0.0005,
      "step": 300
    },
    {
      "epoch": 1.297071129707113,
      "grad_norm": 0.0082059300361877,
      "learning_rate": 6.702135572380078e-06,
      "loss": 0.001,
      "step": 310
    },
    {
      "epoch": 1.3389121338912133,
      "grad_norm": 0.004079307018079572,
      "learning_rate": 6.021817011896004e-06,
      "loss": 0.0014,
      "step": 320
    },
    {
      "epoch": 1.3807531380753137,
      "grad_norm": 0.01109473064765619,
      "learning_rate": 5.362723778905427e-06,
      "loss": 0.001,
      "step": 330
    },
    {
      "epoch": 1.4225941422594142,
      "grad_norm": 0.0015855550385638771,
      "learning_rate": 4.728372420978119e-06,
      "loss": 0.0012,
      "step": 340
    },
    {
      "epoch": 1.4644351464435146,
      "grad_norm": 0.006116117187679222,
      "learning_rate": 4.12214747707527e-06,
      "loss": 0.0014,
      "step": 350
    },
    {
      "epoch": 1.506276150627615,
      "grad_norm": 0.020785630071871383,
      "learning_rate": 3.5472834195697017e-06,
      "loss": 0.0005,
      "step": 360
    },
    {
      "epoch": 1.5481171548117154,
      "grad_norm": 0.01356591109411228,
      "learning_rate": 3.0068473969362998e-06,
      "loss": 0.0016,
      "step": 370
    },
    {
      "epoch": 1.5899581589958158,
      "grad_norm": 0.0455348358926559,
      "learning_rate": 2.5037228691878424e-06,
      "loss": 0.0011,
      "step": 380
    },
    {
      "epoch": 1.6317991631799162,
      "grad_norm": 0.010270285275849788,
      "learning_rate": 2.0405942233682017e-06,
      "loss": 0.0011,
      "step": 390
    },
    {
      "epoch": 1.6736401673640167,
      "grad_norm": 0.009898474319911012,
      "learning_rate": 1.619932451186048e-06,
      "loss": 0.0019,
      "step": 400
    },
    {
      "epoch": 1.715481171548117,
      "grad_norm": 0.025182308499786255,
      "learning_rate": 1.2439819652049178e-06,
      "loss": 0.0008,
      "step": 410
    },
    {
      "epoch": 1.7573221757322175,
      "grad_norm": 0.01013827313449354,
      "learning_rate": 9.147486239311032e-07,
      "loss": 0.001,
      "step": 420
    },
    {
      "epoch": 1.799163179916318,
      "grad_norm": 0.03137196761877556,
      "learning_rate": 6.339890296906493e-07,
      "loss": 0.0013,
      "step": 430
    },
    {
      "epoch": 1.8410041841004183,
      "grad_norm": 0.016893363525461946,
      "learning_rate": 4.032011563958893e-07,
      "loss": 0.0014,
      "step": 440
    },
    {
      "epoch": 1.8828451882845187,
      "grad_norm": 0.015987721995170366,
      "learning_rate": 2.2361635720651199e-07,
      "loss": 0.0012,
      "step": 450
    },
    {
      "epoch": 1.9246861924686192,
      "grad_norm": 0.01083855584874376,
      "learning_rate": 9.619279472766863e-08,
      "loss": 0.0008,
      "step": 460
    },
    {
      "epoch": 1.9665271966527196,
      "grad_norm": 0.009804991431603429,
      "learning_rate": 2.1610328797904145e-08,
      "loss": 0.002,
      "step": 470
    },
    {
      "epoch": 2.0,
      "step": 478,
      "total_flos": 506334263902208.0,
      "train_loss": 0.002037838656673496,
      "train_runtime": 19073.3023,
      "train_samples_per_second": 6.413,
      "train_steps_per_second": 0.025
    }
  ],
  "logging_steps": 10,
  "max_steps": 478,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 2,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 506334263902208.0,
  "train_batch_size": 8,
  "trial_name": null,
  "trial_params": null
}