| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 566, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0088339222614841, | |
| "grad_norm": 0.318359375, | |
| "learning_rate": 8.620689655172414e-06, | |
| "loss": 0.145, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.0176678445229682, | |
| "grad_norm": 0.1845703125, | |
| "learning_rate": 1.7241379310344828e-05, | |
| "loss": 0.1351, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.026501766784452298, | |
| "grad_norm": 0.1513671875, | |
| "learning_rate": 2.5862068965517244e-05, | |
| "loss": 0.1249, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.0353356890459364, | |
| "grad_norm": 0.12109375, | |
| "learning_rate": 3.4482758620689657e-05, | |
| "loss": 0.1189, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.044169611307420496, | |
| "grad_norm": 0.09619140625, | |
| "learning_rate": 4.3103448275862066e-05, | |
| "loss": 0.1181, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.053003533568904596, | |
| "grad_norm": 0.0908203125, | |
| "learning_rate": 4.999961496300632e-05, | |
| "loss": 0.1127, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.061837455830388695, | |
| "grad_norm": 0.0830078125, | |
| "learning_rate": 4.9986140051876094e-05, | |
| "loss": 0.1102, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.0706713780918728, | |
| "grad_norm": 0.08447265625, | |
| "learning_rate": 4.995342646712217e-05, | |
| "loss": 0.1093, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.07950530035335689, | |
| "grad_norm": 0.07958984375, | |
| "learning_rate": 4.9901502197807084e-05, | |
| "loss": 0.109, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.08833922261484099, | |
| "grad_norm": 0.07958984375, | |
| "learning_rate": 4.9830411669255416e-05, | |
| "loss": 0.107, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.09717314487632508, | |
| "grad_norm": 0.07763671875, | |
| "learning_rate": 4.974021570504443e-05, | |
| "loss": 0.1041, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.10600706713780919, | |
| "grad_norm": 0.07421875, | |
| "learning_rate": 4.963099147496465e-05, | |
| "loss": 0.1029, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.11484098939929328, | |
| "grad_norm": 0.0810546875, | |
| "learning_rate": 4.9502832428995005e-05, | |
| "loss": 0.1047, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.12367491166077739, | |
| "grad_norm": 0.0771484375, | |
| "learning_rate": 4.935584821734901e-05, | |
| "loss": 0.1044, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.13250883392226148, | |
| "grad_norm": 0.07568359375, | |
| "learning_rate": 4.919016459666026e-05, | |
| "loss": 0.1063, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.1413427561837456, | |
| "grad_norm": 0.0830078125, | |
| "learning_rate": 4.9005923322387706e-05, | |
| "loss": 0.1025, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.1501766784452297, | |
| "grad_norm": 0.0859375, | |
| "learning_rate": 4.880328202753264e-05, | |
| "loss": 0.1022, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.15901060070671377, | |
| "grad_norm": 0.0791015625, | |
| "learning_rate": 4.858241408777117e-05, | |
| "loss": 0.1031, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.16784452296819788, | |
| "grad_norm": 0.07421875, | |
| "learning_rate": 4.834350847311758e-05, | |
| "loss": 0.1016, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.17667844522968199, | |
| "grad_norm": 0.076171875, | |
| "learning_rate": 4.8086769586245554e-05, | |
| "loss": 0.1008, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.1855123674911661, | |
| "grad_norm": 0.0751953125, | |
| "learning_rate": 4.7812417087605456e-05, | |
| "loss": 0.1043, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.19434628975265017, | |
| "grad_norm": 0.07275390625, | |
| "learning_rate": 4.752068570748746e-05, | |
| "loss": 0.1038, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.20318021201413428, | |
| "grad_norm": 0.078125, | |
| "learning_rate": 4.721182504519118e-05, | |
| "loss": 0.1014, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.21201413427561838, | |
| "grad_norm": 0.0712890625, | |
| "learning_rate": 4.688609935547371e-05, | |
| "loss": 0.1004, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.22084805653710246, | |
| "grad_norm": 0.07177734375, | |
| "learning_rate": 4.654378732245869e-05, | |
| "loss": 0.1017, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.22968197879858657, | |
| "grad_norm": 0.07421875, | |
| "learning_rate": 4.618518182120011e-05, | |
| "loss": 0.1006, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.23851590106007067, | |
| "grad_norm": 0.0751953125, | |
| "learning_rate": 4.5810589667104347e-05, | |
| "loss": 0.1008, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.24734982332155478, | |
| "grad_norm": 0.08447265625, | |
| "learning_rate": 4.542033135342537e-05, | |
| "loss": 0.1012, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.25618374558303886, | |
| "grad_norm": 0.07275390625, | |
| "learning_rate": 4.5014740777057405e-05, | |
| "loss": 0.1026, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.26501766784452296, | |
| "grad_norm": 0.07861328125, | |
| "learning_rate": 4.45941649528596e-05, | |
| "loss": 0.1017, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.27385159010600707, | |
| "grad_norm": 0.0771484375, | |
| "learning_rate": 4.4158963716757444e-05, | |
| "loss": 0.099, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.2826855123674912, | |
| "grad_norm": 0.07666015625, | |
| "learning_rate": 4.370950941787456e-05, | |
| "loss": 0.1021, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.2915194346289753, | |
| "grad_norm": 0.0712890625, | |
| "learning_rate": 4.324618659995855e-05, | |
| "loss": 0.1006, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.3003533568904594, | |
| "grad_norm": 0.076171875, | |
| "learning_rate": 4.27693916723734e-05, | |
| "loss": 0.1006, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.30918727915194344, | |
| "grad_norm": 0.07275390625, | |
| "learning_rate": 4.227953257093985e-05, | |
| "loss": 0.0995, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.31802120141342755, | |
| "grad_norm": 0.07763671875, | |
| "learning_rate": 4.1777028408913985e-05, | |
| "loss": 0.1005, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.32685512367491165, | |
| "grad_norm": 0.07373046875, | |
| "learning_rate": 4.126230911840269e-05, | |
| "loss": 0.0995, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.33568904593639576, | |
| "grad_norm": 0.07666015625, | |
| "learning_rate": 4.07358150825226e-05, | |
| "loss": 0.0984, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.34452296819787986, | |
| "grad_norm": 0.07568359375, | |
| "learning_rate": 4.0197996758617594e-05, | |
| "loss": 0.0979, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.35335689045936397, | |
| "grad_norm": 0.076171875, | |
| "learning_rate": 3.964931429285675e-05, | |
| "loss": 0.0998, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.3621908127208481, | |
| "grad_norm": 0.07958984375, | |
| "learning_rate": 3.909023712654291e-05, | |
| "loss": 0.1012, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.3710247349823322, | |
| "grad_norm": 0.07373046875, | |
| "learning_rate": 3.852124359446845e-05, | |
| "loss": 0.0987, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.37985865724381623, | |
| "grad_norm": 0.0732421875, | |
| "learning_rate": 3.794282051566199e-05, | |
| "loss": 0.0982, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.38869257950530034, | |
| "grad_norm": 0.0751953125, | |
| "learning_rate": 3.7355462776876184e-05, | |
| "loss": 0.0984, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.39752650176678445, | |
| "grad_norm": 0.06982421875, | |
| "learning_rate": 3.6759672909172846e-05, | |
| "loss": 0.0973, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.40636042402826855, | |
| "grad_norm": 0.07080078125, | |
| "learning_rate": 3.615596065796791e-05, | |
| "loss": 0.1007, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.41519434628975266, | |
| "grad_norm": 0.07421875, | |
| "learning_rate": 3.554484254690379e-05, | |
| "loss": 0.0972, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.42402826855123676, | |
| "grad_norm": 0.07080078125, | |
| "learning_rate": 3.492684143592252e-05, | |
| "loss": 0.0974, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.43286219081272087, | |
| "grad_norm": 0.076171875, | |
| "learning_rate": 3.4302486073917686e-05, | |
| "loss": 0.0991, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.4416961130742049, | |
| "grad_norm": 0.076171875, | |
| "learning_rate": 3.3672310646347844e-05, | |
| "loss": 0.0979, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.450530035335689, | |
| "grad_norm": 0.07275390625, | |
| "learning_rate": 3.3036854318198575e-05, | |
| "loss": 0.0987, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.45936395759717313, | |
| "grad_norm": 0.07470703125, | |
| "learning_rate": 3.2396660772684114e-05, | |
| "loss": 0.0999, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.46819787985865724, | |
| "grad_norm": 0.07373046875, | |
| "learning_rate": 3.1752277746083325e-05, | |
| "loss": 0.0979, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.47703180212014135, | |
| "grad_norm": 0.07373046875, | |
| "learning_rate": 3.110425655910795e-05, | |
| "loss": 0.0983, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.48586572438162545, | |
| "grad_norm": 0.07080078125, | |
| "learning_rate": 3.045315164520405e-05, | |
| "loss": 0.0981, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.49469964664310956, | |
| "grad_norm": 0.07568359375, | |
| "learning_rate": 2.9799520076190268e-05, | |
| "loss": 0.0987, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.5035335689045937, | |
| "grad_norm": 0.07080078125, | |
| "learning_rate": 2.914392108563883e-05, | |
| "loss": 0.0963, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.5123674911660777, | |
| "grad_norm": 0.07470703125, | |
| "learning_rate": 2.848691559040687e-05, | |
| "loss": 0.0977, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.5212014134275619, | |
| "grad_norm": 0.072265625, | |
| "learning_rate": 2.7829065710727682e-05, | |
| "loss": 0.0959, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.5300353356890459, | |
| "grad_norm": 0.07275390625, | |
| "learning_rate": 2.7170934289272327e-05, | |
| "loss": 0.0983, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.5388692579505301, | |
| "grad_norm": 0.07177734375, | |
| "learning_rate": 2.6513084409593137e-05, | |
| "loss": 0.0981, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.5477031802120141, | |
| "grad_norm": 0.07373046875, | |
| "learning_rate": 2.585607891436118e-05, | |
| "loss": 0.0972, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.5565371024734982, | |
| "grad_norm": 0.078125, | |
| "learning_rate": 2.5200479923809738e-05, | |
| "loss": 0.0964, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.5653710247349824, | |
| "grad_norm": 0.06787109375, | |
| "learning_rate": 2.4546848354795954e-05, | |
| "loss": 0.0966, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.5742049469964664, | |
| "grad_norm": 0.068359375, | |
| "learning_rate": 2.3895743440892053e-05, | |
| "loss": 0.0983, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.5830388692579506, | |
| "grad_norm": 0.0712890625, | |
| "learning_rate": 2.3247722253916677e-05, | |
| "loss": 0.0983, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.5918727915194346, | |
| "grad_norm": 0.07177734375, | |
| "learning_rate": 2.2603339227315902e-05, | |
| "loss": 0.0982, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.6007067137809188, | |
| "grad_norm": 0.06787109375, | |
| "learning_rate": 2.1963145681801434e-05, | |
| "loss": 0.0968, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.6095406360424028, | |
| "grad_norm": 0.0712890625, | |
| "learning_rate": 2.132768935365215e-05, | |
| "loss": 0.0976, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.6183745583038869, | |
| "grad_norm": 0.0693359375, | |
| "learning_rate": 2.069751392608232e-05, | |
| "loss": 0.0974, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.627208480565371, | |
| "grad_norm": 0.0712890625, | |
| "learning_rate": 2.0073158564077483e-05, | |
| "loss": 0.0992, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.6360424028268551, | |
| "grad_norm": 0.0703125, | |
| "learning_rate": 1.9455157453096225e-05, | |
| "loss": 0.0992, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.6448763250883393, | |
| "grad_norm": 0.06884765625, | |
| "learning_rate": 1.8844039342032095e-05, | |
| "loss": 0.0961, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.6537102473498233, | |
| "grad_norm": 0.06884765625, | |
| "learning_rate": 1.8240327090827153e-05, | |
| "loss": 0.097, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.6625441696113075, | |
| "grad_norm": 0.068359375, | |
| "learning_rate": 1.764453722312383e-05, | |
| "loss": 0.0979, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.6713780918727915, | |
| "grad_norm": 0.0693359375, | |
| "learning_rate": 1.705717948433801e-05, | |
| "loss": 0.0963, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.6802120141342756, | |
| "grad_norm": 0.06787109375, | |
| "learning_rate": 1.6478756405531564e-05, | |
| "loss": 0.0969, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.6890459363957597, | |
| "grad_norm": 0.0673828125, | |
| "learning_rate": 1.5909762873457096e-05, | |
| "loss": 0.0963, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.6978798586572438, | |
| "grad_norm": 0.06787109375, | |
| "learning_rate": 1.5350685707143258e-05, | |
| "loss": 0.0973, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.7067137809187279, | |
| "grad_norm": 0.06591796875, | |
| "learning_rate": 1.4802003241382406e-05, | |
| "loss": 0.0963, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.715547703180212, | |
| "grad_norm": 0.0673828125, | |
| "learning_rate": 1.4264184917477397e-05, | |
| "loss": 0.0964, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.7243816254416962, | |
| "grad_norm": 0.0673828125, | |
| "learning_rate": 1.3737690881597321e-05, | |
| "loss": 0.0981, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.7332155477031802, | |
| "grad_norm": 0.068359375, | |
| "learning_rate": 1.3222971591086014e-05, | |
| "loss": 0.0977, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.7420494699646644, | |
| "grad_norm": 0.06689453125, | |
| "learning_rate": 1.2720467429060156e-05, | |
| "loss": 0.0975, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.7508833922261484, | |
| "grad_norm": 0.06787109375, | |
| "learning_rate": 1.2230608327626608e-05, | |
| "loss": 0.0978, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.7597173144876325, | |
| "grad_norm": 0.0712890625, | |
| "learning_rate": 1.1753813400041453e-05, | |
| "loss": 0.0954, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.7685512367491166, | |
| "grad_norm": 0.06640625, | |
| "learning_rate": 1.1290490582125454e-05, | |
| "loss": 0.0952, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.7773851590106007, | |
| "grad_norm": 0.068359375, | |
| "learning_rate": 1.0841036283242558e-05, | |
| "loss": 0.0971, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.7862190812720848, | |
| "grad_norm": 0.068359375, | |
| "learning_rate": 1.0405835047140401e-05, | |
| "loss": 0.0982, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.7950530035335689, | |
| "grad_norm": 0.06689453125, | |
| "learning_rate": 9.985259222942602e-06, | |
| "loss": 0.0952, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.803886925795053, | |
| "grad_norm": 0.0703125, | |
| "learning_rate": 9.57966864657463e-06, | |
| "loss": 0.0978, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.8127208480565371, | |
| "grad_norm": 0.06982421875, | |
| "learning_rate": 9.189410332895662e-06, | |
| "loss": 0.0989, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.8215547703180212, | |
| "grad_norm": 0.06787109375, | |
| "learning_rate": 8.814818178799892e-06, | |
| "loss": 0.0981, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.8303886925795053, | |
| "grad_norm": 0.06591796875, | |
| "learning_rate": 8.456212677541312e-06, | |
| "loss": 0.0945, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.8392226148409894, | |
| "grad_norm": 0.0673828125, | |
| "learning_rate": 8.113900644526301e-06, | |
| "loss": 0.0988, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.8480565371024735, | |
| "grad_norm": 0.06689453125, | |
| "learning_rate": 7.788174954808826e-06, | |
| "loss": 0.0973, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.8568904593639576, | |
| "grad_norm": 0.06689453125, | |
| "learning_rate": 7.479314292512542e-06, | |
| "loss": 0.0972, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.8657243816254417, | |
| "grad_norm": 0.06689453125, | |
| "learning_rate": 7.187582912394548e-06, | |
| "loss": 0.0977, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.8745583038869258, | |
| "grad_norm": 0.06591796875, | |
| "learning_rate": 6.913230413754452e-06, | |
| "loss": 0.0962, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.8833922261484098, | |
| "grad_norm": 0.06982421875, | |
| "learning_rate": 6.656491526882422e-06, | |
| "loss": 0.0958, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.892226148409894, | |
| "grad_norm": 0.06494140625, | |
| "learning_rate": 6.417585912228833e-06, | |
| "loss": 0.0959, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.901060070671378, | |
| "grad_norm": 0.0703125, | |
| "learning_rate": 6.196717972467361e-06, | |
| "loss": 0.0978, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.9098939929328622, | |
| "grad_norm": 0.0673828125, | |
| "learning_rate": 5.994076677612297e-06, | |
| "loss": 0.0982, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.9187279151943463, | |
| "grad_norm": 0.06640625, | |
| "learning_rate": 5.809835403339747e-06, | |
| "loss": 0.0971, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.9275618374558304, | |
| "grad_norm": 0.06787109375, | |
| "learning_rate": 5.644151782650993e-06, | |
| "loss": 0.0953, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.9363957597173145, | |
| "grad_norm": 0.06982421875, | |
| "learning_rate": 5.497167571004998e-06, | |
| "loss": 0.0956, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.9452296819787986, | |
| "grad_norm": 0.06689453125, | |
| "learning_rate": 5.36900852503536e-06, | |
| "loss": 0.0963, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.9540636042402827, | |
| "grad_norm": 0.06494140625, | |
| "learning_rate": 5.259784294955576e-06, | |
| "loss": 0.0985, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.9628975265017667, | |
| "grad_norm": 0.06884765625, | |
| "learning_rate": 5.169588330744585e-06, | |
| "loss": 0.0982, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.9717314487632509, | |
| "grad_norm": 0.068359375, | |
| "learning_rate": 5.098497802192923e-06, | |
| "loss": 0.0979, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.980565371024735, | |
| "grad_norm": 0.06591796875, | |
| "learning_rate": 5.046573532877835e-06, | |
| "loss": 0.0977, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.9893992932862191, | |
| "grad_norm": 0.06689453125, | |
| "learning_rate": 5.013859948123909e-06, | |
| "loss": 0.0976, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.9982332155477032, | |
| "grad_norm": 0.06591796875, | |
| "learning_rate": 5.000385036993684e-06, | |
| "loss": 0.0977, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 566, | |
| "total_flos": 5.828408442271826e+17, | |
| "train_loss": 0.10050818478102819, | |
| "train_runtime": 2227.4528, | |
| "train_samples_per_second": 32.502, | |
| "train_steps_per_second": 0.254 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 566, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 5.828408442271826e+17, | |
| "train_batch_size": 128, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |