| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.7104, | |
| "global_step": 111000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5e-06, | |
| "loss": 18.5574, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 1e-05, | |
| "loss": 14.5225, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 1.5e-05, | |
| "loss": 9.9891, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 2e-05, | |
| "loss": 8.0802, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 2.5e-05, | |
| "loss": 6.8181, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 3e-05, | |
| "loss": 5.7913, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 3.5e-05, | |
| "loss": 5.0126, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4e-05, | |
| "loss": 4.4555, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.5e-05, | |
| "loss": 4.204, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 5e-05, | |
| "loss": 3.9823, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.996779388083736e-05, | |
| "loss": 3.8565, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.993558776167472e-05, | |
| "loss": 3.7623, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.990338164251208e-05, | |
| "loss": 3.7156, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.987117552334944e-05, | |
| "loss": 3.7255, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.98389694041868e-05, | |
| "loss": 3.6762, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.980676328502415e-05, | |
| "loss": 3.6646, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.977455716586152e-05, | |
| "loss": 3.6359, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.974235104669888e-05, | |
| "loss": 3.5838, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.9710144927536237e-05, | |
| "loss": 3.5486, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.967793880837359e-05, | |
| "loss": 3.552, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.964573268921095e-05, | |
| "loss": 3.5218, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.9613526570048315e-05, | |
| "loss": 3.5091, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.9581320450885674e-05, | |
| "loss": 3.4777, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.9549114331723027e-05, | |
| "loss": 3.4543, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.9516908212560386e-05, | |
| "loss": 3.4746, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.948470209339775e-05, | |
| "loss": 3.4278, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.9452495974235105e-05, | |
| "loss": 3.4421, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.9420289855072464e-05, | |
| "loss": 3.4384, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.938808373590982e-05, | |
| "loss": 3.4251, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.935587761674719e-05, | |
| "loss": 3.4479, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.932367149758454e-05, | |
| "loss": 3.4268, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.92914653784219e-05, | |
| "loss": 3.4272, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.925925925925926e-05, | |
| "loss": 3.4173, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.922705314009662e-05, | |
| "loss": 3.3999, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.919484702093398e-05, | |
| "loss": 3.3821, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.916264090177134e-05, | |
| "loss": 3.3931, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.91304347826087e-05, | |
| "loss": 3.3668, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.909822866344606e-05, | |
| "loss": 3.3695, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.906602254428342e-05, | |
| "loss": 3.3364, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.9033816425120776e-05, | |
| "loss": 3.3646, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.9001610305958136e-05, | |
| "loss": 3.3401, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.8969404186795495e-05, | |
| "loss": 3.3406, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.893719806763285e-05, | |
| "loss": 3.3333, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.8904991948470214e-05, | |
| "loss": 3.3035, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.887278582930757e-05, | |
| "loss": 3.3023, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.884057971014493e-05, | |
| "loss": 3.3023, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.8808373590982285e-05, | |
| "loss": 3.283, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.877616747181965e-05, | |
| "loss": 3.3129, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.874396135265701e-05, | |
| "loss": 3.3093, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.871175523349436e-05, | |
| "loss": 3.3007, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.867954911433172e-05, | |
| "loss": 3.2911, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.864734299516908e-05, | |
| "loss": 3.2856, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.861513687600645e-05, | |
| "loss": 3.2684, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.85829307568438e-05, | |
| "loss": 3.2817, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.855072463768116e-05, | |
| "loss": 3.2531, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.851851851851852e-05, | |
| "loss": 3.2607, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.8486312399355885e-05, | |
| "loss": 3.2418, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.845410628019324e-05, | |
| "loss": 3.2464, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.84219001610306e-05, | |
| "loss": 3.2382, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.8389694041867956e-05, | |
| "loss": 3.2433, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.8357487922705316e-05, | |
| "loss": 3.2448, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.8325281803542675e-05, | |
| "loss": 3.2538, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.8293075684380035e-05, | |
| "loss": 3.2296, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.8260869565217394e-05, | |
| "loss": 3.2306, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.822866344605475e-05, | |
| "loss": 3.2351, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.819645732689211e-05, | |
| "loss": 3.2291, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.816425120772947e-05, | |
| "loss": 3.2215, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.813204508856683e-05, | |
| "loss": 3.1971, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.809983896940419e-05, | |
| "loss": 3.2214, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.806763285024155e-05, | |
| "loss": 3.2143, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.803542673107891e-05, | |
| "loss": 3.1983, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.800322061191627e-05, | |
| "loss": 3.185, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.797101449275362e-05, | |
| "loss": 3.2202, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.793880837359098e-05, | |
| "loss": 3.2021, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.790660225442835e-05, | |
| "loss": 3.1941, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.7874396135265706e-05, | |
| "loss": 3.1897, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.784219001610306e-05, | |
| "loss": 3.2499, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.780998389694042e-05, | |
| "loss": 3.2734, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.7777777777777784e-05, | |
| "loss": 3.2337, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.7745571658615143e-05, | |
| "loss": 3.2304, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.7713365539452496e-05, | |
| "loss": 3.2397, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.7681159420289855e-05, | |
| "loss": 3.249, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.7648953301127215e-05, | |
| "loss": 3.2328, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.7616747181964574e-05, | |
| "loss": 3.2263, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.7584541062801933e-05, | |
| "loss": 3.235, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.755233494363929e-05, | |
| "loss": 3.2242, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.752012882447665e-05, | |
| "loss": 3.2352, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.748792270531401e-05, | |
| "loss": 3.2354, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.745571658615137e-05, | |
| "loss": 3.2433, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.742351046698873e-05, | |
| "loss": 3.2147, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.739130434782609e-05, | |
| "loss": 3.2196, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.735909822866345e-05, | |
| "loss": 3.2019, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.732689210950081e-05, | |
| "loss": 3.2058, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.729468599033817e-05, | |
| "loss": 3.2295, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.726247987117553e-05, | |
| "loss": 3.2247, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.723027375201288e-05, | |
| "loss": 3.2097, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.7198067632850246e-05, | |
| "loss": 3.1852, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.7165861513687605e-05, | |
| "loss": 3.1796, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.7133655394524964e-05, | |
| "loss": 3.2026, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.710144927536232e-05, | |
| "loss": 3.183, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.706924315619968e-05, | |
| "loss": 3.2071, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.703703703703704e-05, | |
| "loss": 3.195, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.7004830917874395e-05, | |
| "loss": 3.2103, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.6972624798711754e-05, | |
| "loss": 3.1835, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.6940418679549114e-05, | |
| "loss": 3.1994, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.690821256038648e-05, | |
| "loss": 3.2085, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.687600644122383e-05, | |
| "loss": 3.1969, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.684380032206119e-05, | |
| "loss": 3.1746, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.681159420289855e-05, | |
| "loss": 3.1597, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.677938808373592e-05, | |
| "loss": 3.229, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.674718196457327e-05, | |
| "loss": 3.2663, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.671497584541063e-05, | |
| "loss": 3.2271, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.668276972624799e-05, | |
| "loss": 3.2445, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.665056360708535e-05, | |
| "loss": 3.1908, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.661835748792271e-05, | |
| "loss": 3.2358, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.6586151368760067e-05, | |
| "loss": 3.2506, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.6553945249597426e-05, | |
| "loss": 3.2306, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.6521739130434785e-05, | |
| "loss": 3.219, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.6489533011272145e-05, | |
| "loss": 3.2098, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.6457326892109504e-05, | |
| "loss": 3.2448, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.642512077294686e-05, | |
| "loss": 3.2451, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.639291465378422e-05, | |
| "loss": 3.2093, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.636070853462158e-05, | |
| "loss": 3.2084, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.632850241545894e-05, | |
| "loss": 3.2344, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.62962962962963e-05, | |
| "loss": 3.197, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.626409017713365e-05, | |
| "loss": 3.2215, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.623188405797101e-05, | |
| "loss": 3.2132, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.619967793880838e-05, | |
| "loss": 3.212, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.616747181964574e-05, | |
| "loss": 3.2082, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.613526570048309e-05, | |
| "loss": 3.18, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.610305958132045e-05, | |
| "loss": 3.2078, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.6070853462157816e-05, | |
| "loss": 3.2154, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.6038647342995176e-05, | |
| "loss": 3.1944, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.600644122383253e-05, | |
| "loss": 3.1892, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.597423510466989e-05, | |
| "loss": 3.2126, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.594202898550725e-05, | |
| "loss": 3.1812, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.5909822866344606e-05, | |
| "loss": 3.1947, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.5877616747181966e-05, | |
| "loss": 3.1581, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.5845410628019325e-05, | |
| "loss": 3.1775, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.5813204508856684e-05, | |
| "loss": 3.1925, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.5780998389694044e-05, | |
| "loss": 3.2015, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.57487922705314e-05, | |
| "loss": 3.2129, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.571658615136876e-05, | |
| "loss": 3.1679, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.568438003220612e-05, | |
| "loss": 3.1762, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.565217391304348e-05, | |
| "loss": 3.1758, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.561996779388084e-05, | |
| "loss": 3.1934, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.55877616747182e-05, | |
| "loss": 3.1869, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.555555555555556e-05, | |
| "loss": 3.1614, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.552334943639291e-05, | |
| "loss": 3.1731, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.549114331723028e-05, | |
| "loss": 3.1559, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.545893719806764e-05, | |
| "loss": 3.1907, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.5426731078904997e-05, | |
| "loss": 3.1697, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.539452495974235e-05, | |
| "loss": 3.1775, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.5362318840579715e-05, | |
| "loss": 3.1583, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.5330112721417075e-05, | |
| "loss": 3.1548, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.5297906602254434e-05, | |
| "loss": 3.1547, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.5265700483091786e-05, | |
| "loss": 3.1595, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.5233494363929146e-05, | |
| "loss": 3.1311, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.520128824476651e-05, | |
| "loss": 3.1494, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.5169082125603865e-05, | |
| "loss": 3.1271, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.5136876006441224e-05, | |
| "loss": 3.13, | |
| "step": 16100 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.510466988727858e-05, | |
| "loss": 3.1193, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.507246376811595e-05, | |
| "loss": 3.1095, | |
| "step": 16300 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.50402576489533e-05, | |
| "loss": 3.1219, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.500805152979066e-05, | |
| "loss": 3.1476, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.497584541062802e-05, | |
| "loss": 3.1189, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.494363929146538e-05, | |
| "loss": 3.1486, | |
| "step": 16700 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.491143317230274e-05, | |
| "loss": 3.1355, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.48792270531401e-05, | |
| "loss": 3.1198, | |
| "step": 16900 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.484702093397746e-05, | |
| "loss": 3.122, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.481481481481482e-05, | |
| "loss": 3.1181, | |
| "step": 17100 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.478260869565218e-05, | |
| "loss": 3.1137, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.4750402576489536e-05, | |
| "loss": 3.1287, | |
| "step": 17300 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.4718196457326895e-05, | |
| "loss": 3.1159, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.4685990338164255e-05, | |
| "loss": 3.0879, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.465378421900161e-05, | |
| "loss": 3.1141, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.4621578099838974e-05, | |
| "loss": 3.1248, | |
| "step": 17700 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.458937198067633e-05, | |
| "loss": 3.1182, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.455716586151369e-05, | |
| "loss": 3.1048, | |
| "step": 17900 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.4524959742351045e-05, | |
| "loss": 3.1278, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.449275362318841e-05, | |
| "loss": 3.0932, | |
| "step": 18100 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.446054750402577e-05, | |
| "loss": 3.0845, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.442834138486312e-05, | |
| "loss": 3.0972, | |
| "step": 18300 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.439613526570048e-05, | |
| "loss": 3.1083, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.436392914653785e-05, | |
| "loss": 3.1304, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.433172302737521e-05, | |
| "loss": 3.1046, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.429951690821256e-05, | |
| "loss": 3.0871, | |
| "step": 18700 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.426731078904992e-05, | |
| "loss": 3.0369, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.423510466988728e-05, | |
| "loss": 3.039, | |
| "step": 18900 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.4202898550724645e-05, | |
| "loss": 3.0397, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.4170692431562e-05, | |
| "loss": 3.0393, | |
| "step": 19100 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.413848631239936e-05, | |
| "loss": 3.0536, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.4106280193236716e-05, | |
| "loss": 3.0447, | |
| "step": 19300 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.4074074074074076e-05, | |
| "loss": 3.0601, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.4041867954911435e-05, | |
| "loss": 3.0573, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.4009661835748794e-05, | |
| "loss": 3.0309, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.3977455716586154e-05, | |
| "loss": 3.0176, | |
| "step": 19700 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.394524959742351e-05, | |
| "loss": 3.0338, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.391304347826087e-05, | |
| "loss": 3.0341, | |
| "step": 19900 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.388083735909823e-05, | |
| "loss": 3.0642, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.384863123993559e-05, | |
| "loss": 3.0159, | |
| "step": 20100 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.3816425120772944e-05, | |
| "loss": 3.0312, | |
| "step": 20200 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.378421900161031e-05, | |
| "loss": 3.0389, | |
| "step": 20300 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.375201288244767e-05, | |
| "loss": 3.0185, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.371980676328503e-05, | |
| "loss": 3.0288, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.368760064412238e-05, | |
| "loss": 3.0237, | |
| "step": 20600 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.365539452495974e-05, | |
| "loss": 3.0061, | |
| "step": 20700 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.362318840579711e-05, | |
| "loss": 3.0291, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.3590982286634466e-05, | |
| "loss": 3.0224, | |
| "step": 20900 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.355877616747182e-05, | |
| "loss": 3.036, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.352657004830918e-05, | |
| "loss": 3.0331, | |
| "step": 21100 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.3494363929146544e-05, | |
| "loss": 3.031, | |
| "step": 21200 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.34621578099839e-05, | |
| "loss": 3.0111, | |
| "step": 21300 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.3429951690821256e-05, | |
| "loss": 3.0262, | |
| "step": 21400 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.3397745571658615e-05, | |
| "loss": 3.0206, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.336553945249598e-05, | |
| "loss": 3.0247, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.3333333333333334e-05, | |
| "loss": 3.0193, | |
| "step": 21700 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.3301127214170693e-05, | |
| "loss": 3.0191, | |
| "step": 21800 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.326892109500805e-05, | |
| "loss": 3.0077, | |
| "step": 21900 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.323671497584541e-05, | |
| "loss": 3.0137, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.320450885668277e-05, | |
| "loss": 3.0214, | |
| "step": 22100 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.317230273752013e-05, | |
| "loss": 3.0309, | |
| "step": 22200 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.314009661835749e-05, | |
| "loss": 3.0105, | |
| "step": 22300 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.310789049919485e-05, | |
| "loss": 3.0167, | |
| "step": 22400 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.307568438003221e-05, | |
| "loss": 3.0264, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.304347826086957e-05, | |
| "loss": 3.0052, | |
| "step": 22600 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.301127214170693e-05, | |
| "loss": 3.0045, | |
| "step": 22700 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.297906602254429e-05, | |
| "loss": 2.9984, | |
| "step": 22800 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.294685990338164e-05, | |
| "loss": 3.0421, | |
| "step": 22900 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.2914653784219006e-05, | |
| "loss": 3.0304, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.2882447665056365e-05, | |
| "loss": 2.9881, | |
| "step": 23100 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.2850241545893724e-05, | |
| "loss": 3.0141, | |
| "step": 23200 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.281803542673108e-05, | |
| "loss": 3.0256, | |
| "step": 23300 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.278582930756844e-05, | |
| "loss": 3.0171, | |
| "step": 23400 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.27536231884058e-05, | |
| "loss": 3.0125, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.2721417069243155e-05, | |
| "loss": 2.9943, | |
| "step": 23600 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.2689210950080514e-05, | |
| "loss": 3.0275, | |
| "step": 23700 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.2657004830917874e-05, | |
| "loss": 3.0172, | |
| "step": 23800 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.262479871175524e-05, | |
| "loss": 3.026, | |
| "step": 23900 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.259259259259259e-05, | |
| "loss": 2.9931, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.256038647342995e-05, | |
| "loss": 2.9838, | |
| "step": 24100 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.252818035426731e-05, | |
| "loss": 3.0091, | |
| "step": 24200 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.249597423510468e-05, | |
| "loss": 3.0074, | |
| "step": 24300 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.246376811594203e-05, | |
| "loss": 3.0174, | |
| "step": 24400 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.243156199677939e-05, | |
| "loss": 3.0061, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.239935587761675e-05, | |
| "loss": 3.0047, | |
| "step": 24600 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.236714975845411e-05, | |
| "loss": 3.0222, | |
| "step": 24700 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.233494363929147e-05, | |
| "loss": 2.9972, | |
| "step": 24800 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.2302737520128827e-05, | |
| "loss": 2.9867, | |
| "step": 24900 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.2270531400966186e-05, | |
| "loss": 2.9991, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.2238325281803545e-05, | |
| "loss": 3.0085, | |
| "step": 25100 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.2206119162640905e-05, | |
| "loss": 3.0198, | |
| "step": 25200 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.2173913043478264e-05, | |
| "loss": 3.0023, | |
| "step": 25300 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.214170692431562e-05, | |
| "loss": 3.0037, | |
| "step": 25400 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.210950080515298e-05, | |
| "loss": 3.0127, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.207729468599034e-05, | |
| "loss": 2.9961, | |
| "step": 25600 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.20450885668277e-05, | |
| "loss": 2.9947, | |
| "step": 25700 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.201288244766506e-05, | |
| "loss": 2.9962, | |
| "step": 25800 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.198067632850241e-05, | |
| "loss": 2.9909, | |
| "step": 25900 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.194847020933977e-05, | |
| "loss": 3.0012, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.191626409017714e-05, | |
| "loss": 3.0109, | |
| "step": 26100 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.18840579710145e-05, | |
| "loss": 2.9798, | |
| "step": 26200 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.185185185185185e-05, | |
| "loss": 2.9892, | |
| "step": 26300 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.181964573268921e-05, | |
| "loss": 3.009, | |
| "step": 26400 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.1787439613526576e-05, | |
| "loss": 2.9882, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.1755233494363936e-05, | |
| "loss": 2.9866, | |
| "step": 26600 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.172302737520129e-05, | |
| "loss": 2.996, | |
| "step": 26700 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.169082125603865e-05, | |
| "loss": 2.9869, | |
| "step": 26800 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.165861513687601e-05, | |
| "loss": 2.9803, | |
| "step": 26900 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.1626409017713366e-05, | |
| "loss": 2.9795, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.1594202898550726e-05, | |
| "loss": 2.9647, | |
| "step": 27100 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.1561996779388085e-05, | |
| "loss": 2.9798, | |
| "step": 27200 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.1529790660225444e-05, | |
| "loss": 2.9796, | |
| "step": 27300 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.1497584541062804e-05, | |
| "loss": 3.0083, | |
| "step": 27400 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.146537842190016e-05, | |
| "loss": 2.9906, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.143317230273752e-05, | |
| "loss": 2.9976, | |
| "step": 27600 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.140096618357488e-05, | |
| "loss": 2.9938, | |
| "step": 27700 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.136876006441224e-05, | |
| "loss": 2.9755, | |
| "step": 27800 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.13365539452496e-05, | |
| "loss": 2.9946, | |
| "step": 27900 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.130434782608696e-05, | |
| "loss": 2.9822, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.127214170692432e-05, | |
| "loss": 2.9832, | |
| "step": 28100 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.123993558776167e-05, | |
| "loss": 2.9882, | |
| "step": 28200 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.120772946859904e-05, | |
| "loss": 2.9852, | |
| "step": 28300 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.11755233494364e-05, | |
| "loss": 2.9715, | |
| "step": 28400 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.1143317230273756e-05, | |
| "loss": 2.9766, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.111111111111111e-05, | |
| "loss": 2.9918, | |
| "step": 28600 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.1078904991948475e-05, | |
| "loss": 2.987, | |
| "step": 28700 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.1046698872785834e-05, | |
| "loss": 3.0042, | |
| "step": 28800 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.101449275362319e-05, | |
| "loss": 2.9618, | |
| "step": 28900 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.0982286634460546e-05, | |
| "loss": 2.9574, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.0950080515297906e-05, | |
| "loss": 2.9725, | |
| "step": 29100 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.091787439613527e-05, | |
| "loss": 2.9744, | |
| "step": 29200 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.0885668276972624e-05, | |
| "loss": 2.9626, | |
| "step": 29300 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.0853462157809984e-05, | |
| "loss": 2.9696, | |
| "step": 29400 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.082125603864734e-05, | |
| "loss": 2.9656, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.078904991948471e-05, | |
| "loss": 2.9799, | |
| "step": 29600 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.075684380032206e-05, | |
| "loss": 2.967, | |
| "step": 29700 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.072463768115942e-05, | |
| "loss": 2.9708, | |
| "step": 29800 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.069243156199678e-05, | |
| "loss": 2.9802, | |
| "step": 29900 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.066022544283414e-05, | |
| "loss": 2.9924, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.06280193236715e-05, | |
| "loss": 2.9763, | |
| "step": 30100 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.059581320450886e-05, | |
| "loss": 2.9725, | |
| "step": 30200 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.056360708534622e-05, | |
| "loss": 2.9613, | |
| "step": 30300 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.053140096618358e-05, | |
| "loss": 2.9905, | |
| "step": 30400 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.049919484702094e-05, | |
| "loss": 2.983, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.0466988727858296e-05, | |
| "loss": 2.9856, | |
| "step": 30600 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.0434782608695655e-05, | |
| "loss": 2.9911, | |
| "step": 30700 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.0402576489533015e-05, | |
| "loss": 2.9511, | |
| "step": 30800 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.0370370370370374e-05, | |
| "loss": 2.9594, | |
| "step": 30900 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.0338164251207733e-05, | |
| "loss": 2.9881, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.030595813204509e-05, | |
| "loss": 2.9808, | |
| "step": 31100 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.0273752012882445e-05, | |
| "loss": 2.9767, | |
| "step": 31200 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.0241545893719805e-05, | |
| "loss": 2.9637, | |
| "step": 31300 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.020933977455717e-05, | |
| "loss": 2.97, | |
| "step": 31400 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.017713365539453e-05, | |
| "loss": 2.9831, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.014492753623188e-05, | |
| "loss": 2.9591, | |
| "step": 31600 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.011272141706924e-05, | |
| "loss": 2.9677, | |
| "step": 31700 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.008051529790661e-05, | |
| "loss": 2.9603, | |
| "step": 31800 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.004830917874397e-05, | |
| "loss": 2.9792, | |
| "step": 31900 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.001610305958132e-05, | |
| "loss": 2.9638, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 3.998389694041868e-05, | |
| "loss": 2.9877, | |
| "step": 32100 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 3.995169082125604e-05, | |
| "loss": 2.9701, | |
| "step": 32200 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 3.99194847020934e-05, | |
| "loss": 2.9743, | |
| "step": 32300 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 3.988727858293076e-05, | |
| "loss": 2.9525, | |
| "step": 32400 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 3.985507246376812e-05, | |
| "loss": 2.9712, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 3.9822866344605476e-05, | |
| "loss": 2.9849, | |
| "step": 32600 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 3.9790660225442836e-05, | |
| "loss": 2.9793, | |
| "step": 32700 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 3.9758454106280195e-05, | |
| "loss": 2.962, | |
| "step": 32800 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 3.9726247987117554e-05, | |
| "loss": 2.9341, | |
| "step": 32900 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 3.9694041867954914e-05, | |
| "loss": 2.9616, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 3.966183574879227e-05, | |
| "loss": 2.9624, | |
| "step": 33100 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 3.962962962962963e-05, | |
| "loss": 2.9524, | |
| "step": 33200 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 3.959742351046699e-05, | |
| "loss": 2.9496, | |
| "step": 33300 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 3.956521739130435e-05, | |
| "loss": 2.946, | |
| "step": 33400 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 3.9533011272141704e-05, | |
| "loss": 2.9893, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 3.950080515297907e-05, | |
| "loss": 2.9673, | |
| "step": 33600 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 3.946859903381643e-05, | |
| "loss": 2.9602, | |
| "step": 33700 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 3.943639291465379e-05, | |
| "loss": 2.9296, | |
| "step": 33800 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 3.940418679549114e-05, | |
| "loss": 2.9511, | |
| "step": 33900 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 3.937198067632851e-05, | |
| "loss": 2.9673, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 3.9339774557165867e-05, | |
| "loss": 2.9396, | |
| "step": 34100 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 3.9307568438003226e-05, | |
| "loss": 2.964, | |
| "step": 34200 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 3.927536231884058e-05, | |
| "loss": 2.9424, | |
| "step": 34300 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 3.924315619967794e-05, | |
| "loss": 2.9581, | |
| "step": 34400 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 3.9210950080515304e-05, | |
| "loss": 2.9764, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 3.9178743961352657e-05, | |
| "loss": 2.9569, | |
| "step": 34600 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 3.9146537842190016e-05, | |
| "loss": 2.9493, | |
| "step": 34700 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 3.9114331723027375e-05, | |
| "loss": 2.9481, | |
| "step": 34800 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 3.908212560386474e-05, | |
| "loss": 2.9708, | |
| "step": 34900 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 3.9049919484702094e-05, | |
| "loss": 2.961, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 3.901771336553945e-05, | |
| "loss": 2.9671, | |
| "step": 35100 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 3.898550724637681e-05, | |
| "loss": 2.9513, | |
| "step": 35200 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 3.895330112721417e-05, | |
| "loss": 2.9627, | |
| "step": 35300 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 3.892109500805153e-05, | |
| "loss": 2.9801, | |
| "step": 35400 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 3.888888888888889e-05, | |
| "loss": 2.9321, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 3.885668276972625e-05, | |
| "loss": 2.9341, | |
| "step": 35600 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 3.882447665056361e-05, | |
| "loss": 2.9564, | |
| "step": 35700 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 3.879227053140097e-05, | |
| "loss": 2.9457, | |
| "step": 35800 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 3.876006441223833e-05, | |
| "loss": 2.9391, | |
| "step": 35900 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 3.872785829307569e-05, | |
| "loss": 2.9653, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 3.869565217391305e-05, | |
| "loss": 2.9586, | |
| "step": 36100 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 3.86634460547504e-05, | |
| "loss": 2.9318, | |
| "step": 36200 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 3.8631239935587766e-05, | |
| "loss": 2.9638, | |
| "step": 36300 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 3.8599033816425125e-05, | |
| "loss": 2.9298, | |
| "step": 36400 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 3.8566827697262484e-05, | |
| "loss": 2.9582, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 3.853462157809984e-05, | |
| "loss": 2.9551, | |
| "step": 36600 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 3.85024154589372e-05, | |
| "loss": 2.9413, | |
| "step": 36700 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 3.847020933977456e-05, | |
| "loss": 2.9451, | |
| "step": 36800 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 3.8438003220611915e-05, | |
| "loss": 2.9283, | |
| "step": 36900 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 3.8405797101449274e-05, | |
| "loss": 2.9442, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 3.837359098228664e-05, | |
| "loss": 2.9317, | |
| "step": 37100 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 3.8341384863124e-05, | |
| "loss": 2.9502, | |
| "step": 37200 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 3.830917874396135e-05, | |
| "loss": 2.9569, | |
| "step": 37300 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 3.827697262479871e-05, | |
| "loss": 2.9578, | |
| "step": 37400 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 3.824476650563607e-05, | |
| "loss": 2.9217, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 3.821256038647344e-05, | |
| "loss": 2.892, | |
| "step": 37600 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 3.818035426731079e-05, | |
| "loss": 2.9126, | |
| "step": 37700 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 3.814814814814815e-05, | |
| "loss": 2.9241, | |
| "step": 37800 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 3.811594202898551e-05, | |
| "loss": 2.9173, | |
| "step": 37900 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 3.808373590982287e-05, | |
| "loss": 2.9392, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 3.805152979066023e-05, | |
| "loss": 2.9118, | |
| "step": 38100 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 3.8019323671497586e-05, | |
| "loss": 2.9359, | |
| "step": 38200 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 3.7987117552334946e-05, | |
| "loss": 2.9353, | |
| "step": 38300 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 3.7954911433172305e-05, | |
| "loss": 2.9162, | |
| "step": 38400 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 3.7922705314009665e-05, | |
| "loss": 2.9191, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 3.7890499194847024e-05, | |
| "loss": 2.9109, | |
| "step": 38600 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 3.785829307568438e-05, | |
| "loss": 2.9269, | |
| "step": 38700 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 3.7826086956521736e-05, | |
| "loss": 2.9294, | |
| "step": 38800 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 3.77938808373591e-05, | |
| "loss": 2.9212, | |
| "step": 38900 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 3.776167471819646e-05, | |
| "loss": 2.896, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 3.772946859903382e-05, | |
| "loss": 2.9256, | |
| "step": 39100 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 3.769726247987117e-05, | |
| "loss": 2.9094, | |
| "step": 39200 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 3.766505636070853e-05, | |
| "loss": 2.9109, | |
| "step": 39300 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 3.76328502415459e-05, | |
| "loss": 2.93, | |
| "step": 39400 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 3.760064412238326e-05, | |
| "loss": 2.8969, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 3.756843800322061e-05, | |
| "loss": 2.8838, | |
| "step": 39600 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 3.753623188405797e-05, | |
| "loss": 2.9289, | |
| "step": 39700 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 3.7504025764895336e-05, | |
| "loss": 2.9072, | |
| "step": 39800 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 3.747181964573269e-05, | |
| "loss": 2.885, | |
| "step": 39900 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 3.743961352657005e-05, | |
| "loss": 2.9021, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 3.740740740740741e-05, | |
| "loss": 2.9029, | |
| "step": 40100 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 3.737520128824477e-05, | |
| "loss": 2.9137, | |
| "step": 40200 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 3.7342995169082126e-05, | |
| "loss": 2.9391, | |
| "step": 40300 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 3.7310789049919485e-05, | |
| "loss": 2.8991, | |
| "step": 40400 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 3.7278582930756845e-05, | |
| "loss": 2.9151, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 3.7246376811594204e-05, | |
| "loss": 2.9185, | |
| "step": 40600 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 3.7214170692431564e-05, | |
| "loss": 2.9046, | |
| "step": 40700 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 3.718196457326892e-05, | |
| "loss": 2.9137, | |
| "step": 40800 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 3.714975845410628e-05, | |
| "loss": 2.9184, | |
| "step": 40900 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 3.711755233494364e-05, | |
| "loss": 2.9086, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 3.7085346215781e-05, | |
| "loss": 2.889, | |
| "step": 41100 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 3.705314009661836e-05, | |
| "loss": 2.9011, | |
| "step": 41200 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 3.702093397745572e-05, | |
| "loss": 2.9011, | |
| "step": 41300 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 3.698872785829308e-05, | |
| "loss": 2.9259, | |
| "step": 41400 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 3.695652173913043e-05, | |
| "loss": 2.9241, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 3.69243156199678e-05, | |
| "loss": 2.9135, | |
| "step": 41600 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 3.689210950080516e-05, | |
| "loss": 2.8876, | |
| "step": 41700 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 3.6859903381642516e-05, | |
| "loss": 2.8951, | |
| "step": 41800 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 3.682769726247987e-05, | |
| "loss": 2.8864, | |
| "step": 41900 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 3.6795491143317235e-05, | |
| "loss": 2.9185, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 3.6763285024154594e-05, | |
| "loss": 2.9086, | |
| "step": 42100 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 3.673107890499195e-05, | |
| "loss": 2.8971, | |
| "step": 42200 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 3.6698872785829306e-05, | |
| "loss": 2.9113, | |
| "step": 42300 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 3.6666666666666666e-05, | |
| "loss": 2.9018, | |
| "step": 42400 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 3.663446054750403e-05, | |
| "loss": 2.9114, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 3.6602254428341384e-05, | |
| "loss": 2.9079, | |
| "step": 42600 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 3.6570048309178744e-05, | |
| "loss": 2.9002, | |
| "step": 42700 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 3.65378421900161e-05, | |
| "loss": 2.895, | |
| "step": 42800 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 3.650563607085347e-05, | |
| "loss": 2.9185, | |
| "step": 42900 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 3.647342995169082e-05, | |
| "loss": 2.9129, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 3.644122383252818e-05, | |
| "loss": 2.9062, | |
| "step": 43100 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 3.640901771336554e-05, | |
| "loss": 2.922, | |
| "step": 43200 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 3.63768115942029e-05, | |
| "loss": 2.8951, | |
| "step": 43300 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 3.634460547504026e-05, | |
| "loss": 2.8875, | |
| "step": 43400 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 3.631239935587762e-05, | |
| "loss": 2.9232, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 3.628019323671498e-05, | |
| "loss": 2.8988, | |
| "step": 43600 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 3.624798711755234e-05, | |
| "loss": 2.8981, | |
| "step": 43700 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 3.62157809983897e-05, | |
| "loss": 2.9191, | |
| "step": 43800 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 3.6183574879227056e-05, | |
| "loss": 2.913, | |
| "step": 43900 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 3.6151368760064415e-05, | |
| "loss": 2.8866, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 3.6119162640901775e-05, | |
| "loss": 2.9148, | |
| "step": 44100 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 3.6086956521739134e-05, | |
| "loss": 2.8952, | |
| "step": 44200 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 3.6054750402576493e-05, | |
| "loss": 2.9016, | |
| "step": 44300 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 3.602254428341385e-05, | |
| "loss": 2.8907, | |
| "step": 44400 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 3.5990338164251205e-05, | |
| "loss": 2.9131, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 3.5958132045088565e-05, | |
| "loss": 2.9062, | |
| "step": 44600 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 3.592592592592593e-05, | |
| "loss": 2.8906, | |
| "step": 44700 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 3.589371980676329e-05, | |
| "loss": 2.9001, | |
| "step": 44800 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 3.586151368760064e-05, | |
| "loss": 2.9017, | |
| "step": 44900 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 3.5829307568438e-05, | |
| "loss": 2.8877, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 3.579710144927537e-05, | |
| "loss": 2.9001, | |
| "step": 45100 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 3.576489533011273e-05, | |
| "loss": 2.8881, | |
| "step": 45200 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 3.573268921095008e-05, | |
| "loss": 2.9095, | |
| "step": 45300 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 3.570048309178744e-05, | |
| "loss": 2.8986, | |
| "step": 45400 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 3.56682769726248e-05, | |
| "loss": 2.8943, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 3.563607085346216e-05, | |
| "loss": 2.9075, | |
| "step": 45600 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 3.560386473429952e-05, | |
| "loss": 2.8839, | |
| "step": 45700 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 3.557165861513688e-05, | |
| "loss": 2.8898, | |
| "step": 45800 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 3.5539452495974236e-05, | |
| "loss": 2.8924, | |
| "step": 45900 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 3.5507246376811596e-05, | |
| "loss": 2.9121, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 3.5475040257648955e-05, | |
| "loss": 2.8874, | |
| "step": 46100 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 3.5442834138486314e-05, | |
| "loss": 2.8974, | |
| "step": 46200 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 3.5410628019323674e-05, | |
| "loss": 2.8805, | |
| "step": 46300 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 3.5378421900161026e-05, | |
| "loss": 2.9021, | |
| "step": 46400 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 3.534621578099839e-05, | |
| "loss": 2.9019, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 3.531400966183575e-05, | |
| "loss": 2.9134, | |
| "step": 46600 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 3.528180354267311e-05, | |
| "loss": 2.8879, | |
| "step": 46700 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 3.5249597423510464e-05, | |
| "loss": 2.8921, | |
| "step": 46800 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 3.521739130434783e-05, | |
| "loss": 2.8898, | |
| "step": 46900 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 3.518518518518519e-05, | |
| "loss": 2.9093, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 3.515297906602255e-05, | |
| "loss": 2.914, | |
| "step": 47100 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 3.51207729468599e-05, | |
| "loss": 2.8932, | |
| "step": 47200 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 3.508856682769727e-05, | |
| "loss": 2.8996, | |
| "step": 47300 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 3.5056360708534627e-05, | |
| "loss": 2.9115, | |
| "step": 47400 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 3.502415458937198e-05, | |
| "loss": 2.9049, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 3.499194847020934e-05, | |
| "loss": 2.8781, | |
| "step": 47600 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 3.49597423510467e-05, | |
| "loss": 2.8964, | |
| "step": 47700 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 3.4927536231884064e-05, | |
| "loss": 2.8742, | |
| "step": 47800 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 3.4895330112721417e-05, | |
| "loss": 2.906, | |
| "step": 47900 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 3.4863123993558776e-05, | |
| "loss": 2.8951, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 3.4830917874396135e-05, | |
| "loss": 2.917, | |
| "step": 48100 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 3.47987117552335e-05, | |
| "loss": 2.9107, | |
| "step": 48200 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 3.4766505636070854e-05, | |
| "loss": 2.8872, | |
| "step": 48300 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 3.473429951690821e-05, | |
| "loss": 2.899, | |
| "step": 48400 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 3.470209339774557e-05, | |
| "loss": 2.8786, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 3.466988727858293e-05, | |
| "loss": 2.8938, | |
| "step": 48600 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 3.463768115942029e-05, | |
| "loss": 2.8837, | |
| "step": 48700 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 3.460547504025765e-05, | |
| "loss": 2.8983, | |
| "step": 48800 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 3.457326892109501e-05, | |
| "loss": 2.8886, | |
| "step": 48900 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 3.454106280193237e-05, | |
| "loss": 2.8936, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 3.450885668276973e-05, | |
| "loss": 2.8828, | |
| "step": 49100 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 3.447665056360709e-05, | |
| "loss": 2.9086, | |
| "step": 49200 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 3.444444444444445e-05, | |
| "loss": 2.8953, | |
| "step": 49300 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 3.441223832528181e-05, | |
| "loss": 2.8772, | |
| "step": 49400 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 3.438003220611916e-05, | |
| "loss": 2.9057, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 3.4347826086956526e-05, | |
| "loss": 2.903, | |
| "step": 49600 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 3.4315619967793885e-05, | |
| "loss": 2.8807, | |
| "step": 49700 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 3.428341384863124e-05, | |
| "loss": 2.8745, | |
| "step": 49800 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 3.42512077294686e-05, | |
| "loss": 2.901, | |
| "step": 49900 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 3.421900161030596e-05, | |
| "loss": 2.8933, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 3.418679549114332e-05, | |
| "loss": 2.8896, | |
| "step": 50100 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 3.4154589371980675e-05, | |
| "loss": 2.9235, | |
| "step": 50200 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 3.4122383252818034e-05, | |
| "loss": 2.8754, | |
| "step": 50300 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 3.40901771336554e-05, | |
| "loss": 2.8787, | |
| "step": 50400 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 3.405797101449276e-05, | |
| "loss": 2.8914, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 3.402576489533011e-05, | |
| "loss": 2.8997, | |
| "step": 50600 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 3.399355877616747e-05, | |
| "loss": 2.8955, | |
| "step": 50700 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 3.396135265700483e-05, | |
| "loss": 2.8822, | |
| "step": 50800 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 3.392914653784219e-05, | |
| "loss": 2.8839, | |
| "step": 50900 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 3.389694041867955e-05, | |
| "loss": 2.8913, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 3.386473429951691e-05, | |
| "loss": 2.8911, | |
| "step": 51100 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 3.383252818035427e-05, | |
| "loss": 2.8872, | |
| "step": 51200 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 3.380032206119163e-05, | |
| "loss": 2.8705, | |
| "step": 51300 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 3.376811594202899e-05, | |
| "loss": 2.8754, | |
| "step": 51400 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 3.3735909822866346e-05, | |
| "loss": 2.892, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 3.3703703703703706e-05, | |
| "loss": 2.8805, | |
| "step": 51600 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 3.3671497584541065e-05, | |
| "loss": 2.8894, | |
| "step": 51700 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 3.3639291465378424e-05, | |
| "loss": 2.8863, | |
| "step": 51800 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 3.3607085346215784e-05, | |
| "loss": 2.8943, | |
| "step": 51900 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 3.357487922705314e-05, | |
| "loss": 2.8963, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 3.3542673107890496e-05, | |
| "loss": 2.8789, | |
| "step": 52100 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 3.351046698872786e-05, | |
| "loss": 2.8709, | |
| "step": 52200 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 3.347826086956522e-05, | |
| "loss": 2.8862, | |
| "step": 52300 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 3.344605475040258e-05, | |
| "loss": 2.8959, | |
| "step": 52400 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 3.341384863123993e-05, | |
| "loss": 2.8716, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 3.338164251207729e-05, | |
| "loss": 2.8774, | |
| "step": 52600 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 3.334943639291466e-05, | |
| "loss": 2.8918, | |
| "step": 52700 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 3.331723027375202e-05, | |
| "loss": 2.8662, | |
| "step": 52800 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 3.328502415458937e-05, | |
| "loss": 2.8855, | |
| "step": 52900 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 3.325281803542673e-05, | |
| "loss": 2.8963, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 3.3220611916264096e-05, | |
| "loss": 2.8666, | |
| "step": 53100 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 3.318840579710145e-05, | |
| "loss": 2.896, | |
| "step": 53200 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 3.315619967793881e-05, | |
| "loss": 2.8793, | |
| "step": 53300 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 3.312399355877617e-05, | |
| "loss": 2.8844, | |
| "step": 53400 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 3.3091787439613533e-05, | |
| "loss": 2.874, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 3.3059581320450886e-05, | |
| "loss": 2.8927, | |
| "step": 53600 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 3.3027375201288245e-05, | |
| "loss": 2.911, | |
| "step": 53700 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 3.2995169082125605e-05, | |
| "loss": 2.8625, | |
| "step": 53800 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 3.2962962962962964e-05, | |
| "loss": 2.8974, | |
| "step": 53900 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 3.2930756843800323e-05, | |
| "loss": 2.8925, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 3.289855072463768e-05, | |
| "loss": 2.8905, | |
| "step": 54100 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 3.286634460547504e-05, | |
| "loss": 2.8759, | |
| "step": 54200 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 3.28341384863124e-05, | |
| "loss": 2.8914, | |
| "step": 54300 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 3.280193236714976e-05, | |
| "loss": 2.891, | |
| "step": 54400 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 3.276972624798712e-05, | |
| "loss": 2.8682, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 3.273752012882448e-05, | |
| "loss": 2.8648, | |
| "step": 54600 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 3.270531400966184e-05, | |
| "loss": 2.8762, | |
| "step": 54700 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 3.267310789049919e-05, | |
| "loss": 2.8824, | |
| "step": 54800 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 3.264090177133656e-05, | |
| "loss": 2.8674, | |
| "step": 54900 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 3.260869565217392e-05, | |
| "loss": 2.875, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 3.2576489533011276e-05, | |
| "loss": 2.8818, | |
| "step": 55100 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 3.254428341384863e-05, | |
| "loss": 2.8724, | |
| "step": 55200 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 3.2512077294685995e-05, | |
| "loss": 2.8942, | |
| "step": 55300 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 3.2479871175523354e-05, | |
| "loss": 2.8675, | |
| "step": 55400 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 3.244766505636071e-05, | |
| "loss": 2.8838, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 3.2415458937198066e-05, | |
| "loss": 2.8721, | |
| "step": 55600 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 3.2383252818035426e-05, | |
| "loss": 2.8693, | |
| "step": 55700 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 3.235104669887279e-05, | |
| "loss": 2.89, | |
| "step": 55800 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 3.2318840579710144e-05, | |
| "loss": 2.8639, | |
| "step": 55900 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 3.2286634460547504e-05, | |
| "loss": 2.8768, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 3.225442834138486e-05, | |
| "loss": 2.8708, | |
| "step": 56100 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 3.222222222222223e-05, | |
| "loss": 2.8661, | |
| "step": 56200 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 3.219001610305958e-05, | |
| "loss": 2.8472, | |
| "step": 56300 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 3.215780998389694e-05, | |
| "loss": 2.866, | |
| "step": 56400 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 3.21256038647343e-05, | |
| "loss": 2.8708, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 3.209339774557166e-05, | |
| "loss": 2.8581, | |
| "step": 56600 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 3.206119162640902e-05, | |
| "loss": 2.8434, | |
| "step": 56700 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 3.202898550724638e-05, | |
| "loss": 2.8684, | |
| "step": 56800 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 3.199677938808374e-05, | |
| "loss": 2.8552, | |
| "step": 56900 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 3.19645732689211e-05, | |
| "loss": 2.8626, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 3.1932367149758457e-05, | |
| "loss": 2.8462, | |
| "step": 57100 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 3.1900161030595816e-05, | |
| "loss": 2.8377, | |
| "step": 57200 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 3.1867954911433175e-05, | |
| "loss": 2.8687, | |
| "step": 57300 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 3.183574879227053e-05, | |
| "loss": 2.8567, | |
| "step": 57400 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 3.1803542673107894e-05, | |
| "loss": 2.8509, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 3.177133655394525e-05, | |
| "loss": 2.8479, | |
| "step": 57600 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 3.173913043478261e-05, | |
| "loss": 2.8708, | |
| "step": 57700 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 3.1706924315619965e-05, | |
| "loss": 2.8539, | |
| "step": 57800 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 3.1674718196457325e-05, | |
| "loss": 2.8535, | |
| "step": 57900 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 3.164251207729469e-05, | |
| "loss": 2.8481, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 3.161030595813205e-05, | |
| "loss": 2.8738, | |
| "step": 58100 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 3.15780998389694e-05, | |
| "loss": 2.8605, | |
| "step": 58200 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 3.154589371980676e-05, | |
| "loss": 2.837, | |
| "step": 58300 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 3.151368760064413e-05, | |
| "loss": 2.8444, | |
| "step": 58400 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 3.148148148148148e-05, | |
| "loss": 2.8361, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 3.144927536231884e-05, | |
| "loss": 2.8451, | |
| "step": 58600 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 3.14170692431562e-05, | |
| "loss": 2.8295, | |
| "step": 58700 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 3.138486312399356e-05, | |
| "loss": 2.8328, | |
| "step": 58800 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 3.135265700483092e-05, | |
| "loss": 2.8572, | |
| "step": 58900 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 3.132045088566828e-05, | |
| "loss": 2.8473, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 3.128824476650564e-05, | |
| "loss": 2.8604, | |
| "step": 59100 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 3.1256038647342996e-05, | |
| "loss": 2.8504, | |
| "step": 59200 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 3.1223832528180356e-05, | |
| "loss": 2.827, | |
| "step": 59300 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 3.1191626409017715e-05, | |
| "loss": 2.8416, | |
| "step": 59400 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 3.1159420289855074e-05, | |
| "loss": 2.8461, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 3.1127214170692434e-05, | |
| "loss": 2.8523, | |
| "step": 59600 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 3.109500805152979e-05, | |
| "loss": 2.8445, | |
| "step": 59700 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 3.106280193236715e-05, | |
| "loss": 2.8598, | |
| "step": 59800 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 3.103059581320451e-05, | |
| "loss": 2.8545, | |
| "step": 59900 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 3.099838969404187e-05, | |
| "loss": 2.8377, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 3.0966183574879224e-05, | |
| "loss": 2.8577, | |
| "step": 60100 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 3.093397745571659e-05, | |
| "loss": 2.8576, | |
| "step": 60200 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 3.090177133655395e-05, | |
| "loss": 2.8384, | |
| "step": 60300 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 3.086956521739131e-05, | |
| "loss": 2.8546, | |
| "step": 60400 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 3.083735909822866e-05, | |
| "loss": 2.8612, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 3.080515297906603e-05, | |
| "loss": 2.8665, | |
| "step": 60600 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 3.0772946859903386e-05, | |
| "loss": 2.842, | |
| "step": 60700 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 3.074074074074074e-05, | |
| "loss": 2.8384, | |
| "step": 60800 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 3.07085346215781e-05, | |
| "loss": 2.8484, | |
| "step": 60900 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 3.067632850241546e-05, | |
| "loss": 2.8333, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 3.0644122383252824e-05, | |
| "loss": 2.8461, | |
| "step": 61100 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 3.0611916264090176e-05, | |
| "loss": 2.8529, | |
| "step": 61200 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 3.0579710144927536e-05, | |
| "loss": 2.8303, | |
| "step": 61300 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 3.0547504025764895e-05, | |
| "loss": 2.8382, | |
| "step": 61400 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 3.0515297906602258e-05, | |
| "loss": 2.8208, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 3.0483091787439617e-05, | |
| "loss": 2.8559, | |
| "step": 61600 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 3.0450885668276973e-05, | |
| "loss": 2.8508, | |
| "step": 61700 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 3.0418679549114333e-05, | |
| "loss": 2.84, | |
| "step": 61800 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 3.038647342995169e-05, | |
| "loss": 2.8386, | |
| "step": 61900 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 3.035426731078905e-05, | |
| "loss": 2.8582, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 3.032206119162641e-05, | |
| "loss": 2.8465, | |
| "step": 62100 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 3.028985507246377e-05, | |
| "loss": 2.8268, | |
| "step": 62200 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 3.0257648953301126e-05, | |
| "loss": 2.8347, | |
| "step": 62300 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 3.022544283413849e-05, | |
| "loss": 2.8303, | |
| "step": 62400 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 3.0193236714975848e-05, | |
| "loss": 2.874, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 3.0161030595813204e-05, | |
| "loss": 2.8326, | |
| "step": 62600 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 3.0128824476650563e-05, | |
| "loss": 2.8549, | |
| "step": 62700 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 3.0096618357487926e-05, | |
| "loss": 2.8465, | |
| "step": 62800 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 3.0064412238325285e-05, | |
| "loss": 2.8642, | |
| "step": 62900 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 3.003220611916264e-05, | |
| "loss": 2.8433, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 3e-05, | |
| "loss": 2.8583, | |
| "step": 63100 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 2.9967793880837357e-05, | |
| "loss": 2.8494, | |
| "step": 63200 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 2.9935587761674723e-05, | |
| "loss": 2.836, | |
| "step": 63300 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 2.990338164251208e-05, | |
| "loss": 2.8494, | |
| "step": 63400 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 2.9871175523349438e-05, | |
| "loss": 2.8466, | |
| "step": 63500 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 2.9838969404186794e-05, | |
| "loss": 2.8538, | |
| "step": 63600 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 2.9806763285024157e-05, | |
| "loss": 2.8522, | |
| "step": 63700 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 2.9774557165861516e-05, | |
| "loss": 2.854, | |
| "step": 63800 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 2.9742351046698876e-05, | |
| "loss": 2.8272, | |
| "step": 63900 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 2.971014492753623e-05, | |
| "loss": 2.844, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 2.967793880837359e-05, | |
| "loss": 2.8471, | |
| "step": 64100 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 2.9645732689210954e-05, | |
| "loss": 2.8302, | |
| "step": 64200 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 2.961352657004831e-05, | |
| "loss": 2.8652, | |
| "step": 64300 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 2.958132045088567e-05, | |
| "loss": 2.8438, | |
| "step": 64400 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 2.9549114331723028e-05, | |
| "loss": 2.8411, | |
| "step": 64500 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 2.951690821256039e-05, | |
| "loss": 2.8372, | |
| "step": 64600 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 2.9484702093397747e-05, | |
| "loss": 2.8387, | |
| "step": 64700 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 2.9452495974235106e-05, | |
| "loss": 2.8622, | |
| "step": 64800 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 2.9420289855072462e-05, | |
| "loss": 2.8256, | |
| "step": 64900 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 2.938808373590982e-05, | |
| "loss": 2.845, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 2.9355877616747184e-05, | |
| "loss": 2.843, | |
| "step": 65100 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 2.9323671497584544e-05, | |
| "loss": 2.8388, | |
| "step": 65200 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 2.92914653784219e-05, | |
| "loss": 2.8725, | |
| "step": 65300 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 2.925925925925926e-05, | |
| "loss": 2.844, | |
| "step": 65400 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 2.9227053140096622e-05, | |
| "loss": 2.828, | |
| "step": 65500 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 2.919484702093398e-05, | |
| "loss": 2.8495, | |
| "step": 65600 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 2.9162640901771337e-05, | |
| "loss": 2.8342, | |
| "step": 65700 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 2.9130434782608696e-05, | |
| "loss": 2.8519, | |
| "step": 65800 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 2.909822866344606e-05, | |
| "loss": 2.8379, | |
| "step": 65900 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 2.9066022544283415e-05, | |
| "loss": 2.8307, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 2.9033816425120775e-05, | |
| "loss": 2.8247, | |
| "step": 66100 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 2.9001610305958134e-05, | |
| "loss": 2.8353, | |
| "step": 66200 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 2.896940418679549e-05, | |
| "loss": 2.8314, | |
| "step": 66300 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 2.8937198067632853e-05, | |
| "loss": 2.8551, | |
| "step": 66400 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 2.8904991948470212e-05, | |
| "loss": 2.8397, | |
| "step": 66500 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 2.8872785829307568e-05, | |
| "loss": 2.8393, | |
| "step": 66600 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 2.8840579710144927e-05, | |
| "loss": 2.8362, | |
| "step": 66700 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 2.880837359098229e-05, | |
| "loss": 2.8416, | |
| "step": 66800 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 2.877616747181965e-05, | |
| "loss": 2.8438, | |
| "step": 66900 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 2.8743961352657005e-05, | |
| "loss": 2.8604, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 2.8711755233494365e-05, | |
| "loss": 2.8333, | |
| "step": 67100 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 2.867954911433172e-05, | |
| "loss": 2.841, | |
| "step": 67200 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 2.8647342995169087e-05, | |
| "loss": 2.8396, | |
| "step": 67300 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 2.8615136876006443e-05, | |
| "loss": 2.8359, | |
| "step": 67400 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 2.8582930756843802e-05, | |
| "loss": 2.8387, | |
| "step": 67500 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 2.8550724637681158e-05, | |
| "loss": 2.8228, | |
| "step": 67600 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 2.851851851851852e-05, | |
| "loss": 2.8492, | |
| "step": 67700 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 2.848631239935588e-05, | |
| "loss": 2.8357, | |
| "step": 67800 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 2.845410628019324e-05, | |
| "loss": 2.8602, | |
| "step": 67900 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 2.8421900161030595e-05, | |
| "loss": 2.8513, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 2.8389694041867955e-05, | |
| "loss": 2.8451, | |
| "step": 68100 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 2.8357487922705318e-05, | |
| "loss": 2.838, | |
| "step": 68200 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 2.8325281803542674e-05, | |
| "loss": 2.8168, | |
| "step": 68300 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 2.8293075684380033e-05, | |
| "loss": 2.8381, | |
| "step": 68400 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 2.826086956521739e-05, | |
| "loss": 2.8328, | |
| "step": 68500 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 2.8228663446054755e-05, | |
| "loss": 2.8024, | |
| "step": 68600 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 2.819645732689211e-05, | |
| "loss": 2.8387, | |
| "step": 68700 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 2.816425120772947e-05, | |
| "loss": 2.8464, | |
| "step": 68800 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 2.8132045088566826e-05, | |
| "loss": 2.8304, | |
| "step": 68900 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 2.8099838969404192e-05, | |
| "loss": 2.8233, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 2.806763285024155e-05, | |
| "loss": 2.8343, | |
| "step": 69100 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 2.8035426731078908e-05, | |
| "loss": 2.835, | |
| "step": 69200 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 2.8003220611916264e-05, | |
| "loss": 2.8356, | |
| "step": 69300 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 2.7971014492753623e-05, | |
| "loss": 2.8548, | |
| "step": 69400 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 2.7938808373590986e-05, | |
| "loss": 2.8293, | |
| "step": 69500 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 2.7906602254428345e-05, | |
| "loss": 2.8255, | |
| "step": 69600 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 2.78743961352657e-05, | |
| "loss": 2.8358, | |
| "step": 69700 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 2.784219001610306e-05, | |
| "loss": 2.8539, | |
| "step": 69800 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 2.7809983896940423e-05, | |
| "loss": 2.8419, | |
| "step": 69900 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 2.777777777777778e-05, | |
| "loss": 2.8279, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 2.774557165861514e-05, | |
| "loss": 2.8491, | |
| "step": 70100 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 2.7713365539452494e-05, | |
| "loss": 2.8203, | |
| "step": 70200 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 2.7681159420289854e-05, | |
| "loss": 2.8144, | |
| "step": 70300 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 2.7648953301127217e-05, | |
| "loss": 2.8371, | |
| "step": 70400 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 2.7616747181964576e-05, | |
| "loss": 2.8459, | |
| "step": 70500 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 2.7584541062801932e-05, | |
| "loss": 2.8455, | |
| "step": 70600 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 2.755233494363929e-05, | |
| "loss": 2.8141, | |
| "step": 70700 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 2.7520128824476654e-05, | |
| "loss": 2.8354, | |
| "step": 70800 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 2.7487922705314013e-05, | |
| "loss": 2.8243, | |
| "step": 70900 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 2.745571658615137e-05, | |
| "loss": 2.8168, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 2.742351046698873e-05, | |
| "loss": 2.8294, | |
| "step": 71100 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 2.7391304347826085e-05, | |
| "loss": 2.8431, | |
| "step": 71200 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 2.7359098228663447e-05, | |
| "loss": 2.8458, | |
| "step": 71300 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 2.7326892109500807e-05, | |
| "loss": 2.8317, | |
| "step": 71400 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 2.7294685990338166e-05, | |
| "loss": 2.8287, | |
| "step": 71500 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 2.7262479871175522e-05, | |
| "loss": 2.8311, | |
| "step": 71600 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 2.7230273752012885e-05, | |
| "loss": 2.8279, | |
| "step": 71700 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 2.7198067632850244e-05, | |
| "loss": 2.8073, | |
| "step": 71800 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 2.71658615136876e-05, | |
| "loss": 2.8293, | |
| "step": 71900 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 2.713365539452496e-05, | |
| "loss": 2.8098, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 2.7101449275362322e-05, | |
| "loss": 2.8379, | |
| "step": 72100 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 2.706924315619968e-05, | |
| "loss": 2.8241, | |
| "step": 72200 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 2.7037037037037037e-05, | |
| "loss": 2.8216, | |
| "step": 72300 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 2.7004830917874397e-05, | |
| "loss": 2.8374, | |
| "step": 72400 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 2.6972624798711753e-05, | |
| "loss": 2.8052, | |
| "step": 72500 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 2.694041867954912e-05, | |
| "loss": 2.843, | |
| "step": 72600 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 2.6908212560386475e-05, | |
| "loss": 2.8366, | |
| "step": 72700 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 2.6876006441223834e-05, | |
| "loss": 2.8182, | |
| "step": 72800 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 2.684380032206119e-05, | |
| "loss": 2.8541, | |
| "step": 72900 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 2.6811594202898553e-05, | |
| "loss": 2.8235, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 2.6779388083735912e-05, | |
| "loss": 2.8081, | |
| "step": 73100 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 2.674718196457327e-05, | |
| "loss": 2.8794, | |
| "step": 73200 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 2.6714975845410628e-05, | |
| "loss": 2.8174, | |
| "step": 73300 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 2.6682769726247987e-05, | |
| "loss": 2.8363, | |
| "step": 73400 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 2.665056360708535e-05, | |
| "loss": 2.8072, | |
| "step": 73500 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 2.6618357487922706e-05, | |
| "loss": 2.8166, | |
| "step": 73600 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 2.6586151368760065e-05, | |
| "loss": 2.8289, | |
| "step": 73700 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 2.6553945249597424e-05, | |
| "loss": 2.8295, | |
| "step": 73800 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 2.6521739130434787e-05, | |
| "loss": 2.8126, | |
| "step": 73900 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 2.6489533011272143e-05, | |
| "loss": 2.8437, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 2.6457326892109502e-05, | |
| "loss": 2.8406, | |
| "step": 74100 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 2.642512077294686e-05, | |
| "loss": 2.8443, | |
| "step": 74200 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 2.6392914653784218e-05, | |
| "loss": 2.8291, | |
| "step": 74300 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 2.636070853462158e-05, | |
| "loss": 2.8393, | |
| "step": 74400 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 2.632850241545894e-05, | |
| "loss": 2.825, | |
| "step": 74500 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 2.6296296296296296e-05, | |
| "loss": 2.835, | |
| "step": 74600 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 2.6264090177133655e-05, | |
| "loss": 2.8325, | |
| "step": 74700 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 2.6231884057971018e-05, | |
| "loss": 2.8117, | |
| "step": 74800 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 2.6199677938808377e-05, | |
| "loss": 2.8135, | |
| "step": 74900 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 2.6167471819645733e-05, | |
| "loss": 2.7984, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 2.6135265700483093e-05, | |
| "loss": 2.8305, | |
| "step": 75100 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 2.6103059581320455e-05, | |
| "loss": 2.8003, | |
| "step": 75200 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 2.607085346215781e-05, | |
| "loss": 2.7895, | |
| "step": 75300 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 2.603864734299517e-05, | |
| "loss": 2.8147, | |
| "step": 75400 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 2.600644122383253e-05, | |
| "loss": 2.8025, | |
| "step": 75500 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 2.5974235104669886e-05, | |
| "loss": 2.8163, | |
| "step": 75600 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 2.594202898550725e-05, | |
| "loss": 2.8055, | |
| "step": 75700 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 2.5909822866344608e-05, | |
| "loss": 2.8029, | |
| "step": 75800 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 2.5877616747181964e-05, | |
| "loss": 2.8047, | |
| "step": 75900 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 2.5845410628019323e-05, | |
| "loss": 2.8138, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 2.5813204508856686e-05, | |
| "loss": 2.8168, | |
| "step": 76100 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 2.5780998389694045e-05, | |
| "loss": 2.844, | |
| "step": 76200 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 2.57487922705314e-05, | |
| "loss": 2.8229, | |
| "step": 76300 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 2.571658615136876e-05, | |
| "loss": 2.8018, | |
| "step": 76400 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 2.5684380032206117e-05, | |
| "loss": 2.8209, | |
| "step": 76500 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 2.5652173913043483e-05, | |
| "loss": 2.8103, | |
| "step": 76600 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 2.561996779388084e-05, | |
| "loss": 2.7793, | |
| "step": 76700 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 2.5587761674718198e-05, | |
| "loss": 2.8128, | |
| "step": 76800 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 2.5555555555555554e-05, | |
| "loss": 2.8035, | |
| "step": 76900 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 2.5523349436392917e-05, | |
| "loss": 2.82, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 2.5491143317230276e-05, | |
| "loss": 2.8034, | |
| "step": 77100 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 2.5458937198067636e-05, | |
| "loss": 2.7988, | |
| "step": 77200 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 2.542673107890499e-05, | |
| "loss": 2.7987, | |
| "step": 77300 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 2.539452495974235e-05, | |
| "loss": 2.8125, | |
| "step": 77400 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 2.5362318840579714e-05, | |
| "loss": 2.81, | |
| "step": 77500 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 2.533011272141707e-05, | |
| "loss": 2.8113, | |
| "step": 77600 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 2.529790660225443e-05, | |
| "loss": 2.8195, | |
| "step": 77700 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 2.5265700483091785e-05, | |
| "loss": 2.8123, | |
| "step": 77800 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 2.523349436392915e-05, | |
| "loss": 2.8181, | |
| "step": 77900 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 2.5201288244766507e-05, | |
| "loss": 2.7974, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 2.5169082125603866e-05, | |
| "loss": 2.7791, | |
| "step": 78100 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 2.5136876006441222e-05, | |
| "loss": 2.7866, | |
| "step": 78200 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 2.510466988727859e-05, | |
| "loss": 2.8178, | |
| "step": 78300 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 2.5072463768115944e-05, | |
| "loss": 2.806, | |
| "step": 78400 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 2.5040257648953304e-05, | |
| "loss": 2.7977, | |
| "step": 78500 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 2.500805152979066e-05, | |
| "loss": 2.8035, | |
| "step": 78600 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 2.4975845410628022e-05, | |
| "loss": 2.8055, | |
| "step": 78700 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 2.494363929146538e-05, | |
| "loss": 2.8079, | |
| "step": 78800 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 2.491143317230274e-05, | |
| "loss": 2.8052, | |
| "step": 78900 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 2.4879227053140097e-05, | |
| "loss": 2.8017, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 2.4847020933977456e-05, | |
| "loss": 2.8018, | |
| "step": 79100 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 2.4814814814814816e-05, | |
| "loss": 2.8021, | |
| "step": 79200 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 2.4782608695652175e-05, | |
| "loss": 2.8039, | |
| "step": 79300 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 2.4750402576489534e-05, | |
| "loss": 2.8036, | |
| "step": 79400 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 2.471819645732689e-05, | |
| "loss": 2.8052, | |
| "step": 79500 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 2.4685990338164253e-05, | |
| "loss": 2.7947, | |
| "step": 79600 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 2.465378421900161e-05, | |
| "loss": 2.814, | |
| "step": 79700 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 2.4621578099838972e-05, | |
| "loss": 2.8053, | |
| "step": 79800 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 2.4589371980676328e-05, | |
| "loss": 2.7983, | |
| "step": 79900 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 2.455716586151369e-05, | |
| "loss": 2.8219, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 2.4524959742351047e-05, | |
| "loss": 2.8118, | |
| "step": 80100 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 2.449275362318841e-05, | |
| "loss": 2.7935, | |
| "step": 80200 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 2.4460547504025765e-05, | |
| "loss": 2.7984, | |
| "step": 80300 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 2.4428341384863128e-05, | |
| "loss": 2.8212, | |
| "step": 80400 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 2.4396135265700484e-05, | |
| "loss": 2.8028, | |
| "step": 80500 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 2.4363929146537843e-05, | |
| "loss": 2.7923, | |
| "step": 80600 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 2.4331723027375203e-05, | |
| "loss": 2.7949, | |
| "step": 80700 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 2.4299516908212562e-05, | |
| "loss": 2.7843, | |
| "step": 80800 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 2.426731078904992e-05, | |
| "loss": 2.8099, | |
| "step": 80900 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 2.423510466988728e-05, | |
| "loss": 2.7919, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 2.420289855072464e-05, | |
| "loss": 2.7945, | |
| "step": 81100 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 2.4170692431561996e-05, | |
| "loss": 2.7856, | |
| "step": 81200 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 2.413848631239936e-05, | |
| "loss": 2.8014, | |
| "step": 81300 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 2.4106280193236715e-05, | |
| "loss": 2.8032, | |
| "step": 81400 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 2.4074074074074074e-05, | |
| "loss": 2.8034, | |
| "step": 81500 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 2.4041867954911433e-05, | |
| "loss": 2.796, | |
| "step": 81600 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 2.4009661835748793e-05, | |
| "loss": 2.8212, | |
| "step": 81700 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 2.3977455716586152e-05, | |
| "loss": 2.801, | |
| "step": 81800 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 2.394524959742351e-05, | |
| "loss": 2.8197, | |
| "step": 81900 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 2.391304347826087e-05, | |
| "loss": 2.7941, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 2.388083735909823e-05, | |
| "loss": 2.8036, | |
| "step": 82100 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 2.384863123993559e-05, | |
| "loss": 2.8133, | |
| "step": 82200 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 2.381642512077295e-05, | |
| "loss": 2.8264, | |
| "step": 82300 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 2.3784219001610308e-05, | |
| "loss": 2.8203, | |
| "step": 82400 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 2.3752012882447668e-05, | |
| "loss": 2.8095, | |
| "step": 82500 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 2.3719806763285024e-05, | |
| "loss": 2.7942, | |
| "step": 82600 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 2.3687600644122386e-05, | |
| "loss": 2.802, | |
| "step": 82700 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 2.3655394524959742e-05, | |
| "loss": 2.8047, | |
| "step": 82800 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 2.36231884057971e-05, | |
| "loss": 2.8333, | |
| "step": 82900 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 2.359098228663446e-05, | |
| "loss": 2.7805, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 2.355877616747182e-05, | |
| "loss": 2.7929, | |
| "step": 83100 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 2.352657004830918e-05, | |
| "loss": 2.7951, | |
| "step": 83200 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 2.349436392914654e-05, | |
| "loss": 2.7878, | |
| "step": 83300 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 2.34621578099839e-05, | |
| "loss": 2.8133, | |
| "step": 83400 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 2.3429951690821258e-05, | |
| "loss": 2.81, | |
| "step": 83500 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 2.3397745571658617e-05, | |
| "loss": 2.7892, | |
| "step": 83600 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 2.3365539452495973e-05, | |
| "loss": 2.8081, | |
| "step": 83700 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 2.3333333333333336e-05, | |
| "loss": 2.8013, | |
| "step": 83800 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 2.3301127214170692e-05, | |
| "loss": 2.802, | |
| "step": 83900 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 2.3268921095008055e-05, | |
| "loss": 2.8049, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 2.323671497584541e-05, | |
| "loss": 2.8115, | |
| "step": 84100 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 2.3204508856682773e-05, | |
| "loss": 2.8037, | |
| "step": 84200 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 2.317230273752013e-05, | |
| "loss": 2.8067, | |
| "step": 84300 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 2.314009661835749e-05, | |
| "loss": 2.8075, | |
| "step": 84400 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 2.3107890499194848e-05, | |
| "loss": 2.8046, | |
| "step": 84500 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 2.3075684380032207e-05, | |
| "loss": 2.794, | |
| "step": 84600 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 2.3043478260869567e-05, | |
| "loss": 2.7994, | |
| "step": 84700 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 2.3011272141706926e-05, | |
| "loss": 2.8026, | |
| "step": 84800 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 2.2979066022544285e-05, | |
| "loss": 2.7916, | |
| "step": 84900 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 2.294685990338164e-05, | |
| "loss": 2.804, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 2.2914653784219004e-05, | |
| "loss": 2.7875, | |
| "step": 85100 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 2.288244766505636e-05, | |
| "loss": 2.8017, | |
| "step": 85200 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 2.2850241545893723e-05, | |
| "loss": 2.7964, | |
| "step": 85300 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 2.281803542673108e-05, | |
| "loss": 2.819, | |
| "step": 85400 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 2.278582930756844e-05, | |
| "loss": 2.8035, | |
| "step": 85500 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 2.2753623188405797e-05, | |
| "loss": 2.8024, | |
| "step": 85600 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 2.2721417069243157e-05, | |
| "loss": 2.7984, | |
| "step": 85700 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 2.2689210950080516e-05, | |
| "loss": 2.8093, | |
| "step": 85800 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 2.2657004830917875e-05, | |
| "loss": 2.8083, | |
| "step": 85900 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 2.2624798711755235e-05, | |
| "loss": 2.8187, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 2.2592592592592594e-05, | |
| "loss": 2.8028, | |
| "step": 86100 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 2.2560386473429953e-05, | |
| "loss": 2.7928, | |
| "step": 86200 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 2.2528180354267313e-05, | |
| "loss": 2.8309, | |
| "step": 86300 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 2.2495974235104672e-05, | |
| "loss": 2.8041, | |
| "step": 86400 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 2.246376811594203e-05, | |
| "loss": 2.8062, | |
| "step": 86500 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 2.243156199677939e-05, | |
| "loss": 2.7811, | |
| "step": 86600 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 2.2399355877616747e-05, | |
| "loss": 2.8267, | |
| "step": 86700 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 2.2367149758454106e-05, | |
| "loss": 2.8071, | |
| "step": 86800 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 2.2334943639291466e-05, | |
| "loss": 2.7929, | |
| "step": 86900 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 2.2302737520128825e-05, | |
| "loss": 2.7799, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 2.2270531400966184e-05, | |
| "loss": 2.8064, | |
| "step": 87100 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 2.2238325281803544e-05, | |
| "loss": 2.7858, | |
| "step": 87200 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 2.2206119162640903e-05, | |
| "loss": 2.7987, | |
| "step": 87300 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 2.2173913043478262e-05, | |
| "loss": 2.8083, | |
| "step": 87400 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 2.214170692431562e-05, | |
| "loss": 2.7931, | |
| "step": 87500 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 2.210950080515298e-05, | |
| "loss": 2.7966, | |
| "step": 87600 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 2.2077294685990337e-05, | |
| "loss": 2.7905, | |
| "step": 87700 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 2.20450885668277e-05, | |
| "loss": 2.7893, | |
| "step": 87800 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 2.2012882447665056e-05, | |
| "loss": 2.7872, | |
| "step": 87900 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 2.198067632850242e-05, | |
| "loss": 2.7914, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 2.1948470209339774e-05, | |
| "loss": 2.804, | |
| "step": 88100 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 2.1916264090177137e-05, | |
| "loss": 2.7954, | |
| "step": 88200 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 2.1884057971014493e-05, | |
| "loss": 2.7865, | |
| "step": 88300 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 2.1851851851851852e-05, | |
| "loss": 2.791, | |
| "step": 88400 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 2.1819645732689212e-05, | |
| "loss": 2.7986, | |
| "step": 88500 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 2.178743961352657e-05, | |
| "loss": 2.7855, | |
| "step": 88600 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 2.175523349436393e-05, | |
| "loss": 2.7979, | |
| "step": 88700 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 2.1723027375201286e-05, | |
| "loss": 2.7887, | |
| "step": 88800 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 2.169082125603865e-05, | |
| "loss": 2.8069, | |
| "step": 88900 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 2.1658615136876005e-05, | |
| "loss": 2.7937, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 2.1626409017713368e-05, | |
| "loss": 2.7942, | |
| "step": 89100 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 2.1594202898550724e-05, | |
| "loss": 2.779, | |
| "step": 89200 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 2.1561996779388087e-05, | |
| "loss": 2.7915, | |
| "step": 89300 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 2.1529790660225443e-05, | |
| "loss": 2.7835, | |
| "step": 89400 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 2.1497584541062805e-05, | |
| "loss": 2.7864, | |
| "step": 89500 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 2.146537842190016e-05, | |
| "loss": 2.7905, | |
| "step": 89600 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 2.1433172302737524e-05, | |
| "loss": 2.8186, | |
| "step": 89700 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 2.140096618357488e-05, | |
| "loss": 2.7718, | |
| "step": 89800 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 2.136876006441224e-05, | |
| "loss": 2.8057, | |
| "step": 89900 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 2.13365539452496e-05, | |
| "loss": 2.8028, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 2.1304347826086958e-05, | |
| "loss": 2.8135, | |
| "step": 90100 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 2.1272141706924317e-05, | |
| "loss": 2.7958, | |
| "step": 90200 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 2.1239935587761677e-05, | |
| "loss": 2.8114, | |
| "step": 90300 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 2.1207729468599036e-05, | |
| "loss": 2.7923, | |
| "step": 90400 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 2.1175523349436392e-05, | |
| "loss": 2.8136, | |
| "step": 90500 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 2.1143317230273755e-05, | |
| "loss": 2.8194, | |
| "step": 90600 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 2.111111111111111e-05, | |
| "loss": 2.7926, | |
| "step": 90700 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 2.107890499194847e-05, | |
| "loss": 2.7928, | |
| "step": 90800 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 2.104669887278583e-05, | |
| "loss": 2.793, | |
| "step": 90900 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 2.101449275362319e-05, | |
| "loss": 2.7991, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 2.0982286634460548e-05, | |
| "loss": 2.8141, | |
| "step": 91100 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 2.0950080515297908e-05, | |
| "loss": 2.7874, | |
| "step": 91200 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 2.0917874396135267e-05, | |
| "loss": 2.8107, | |
| "step": 91300 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 2.0885668276972626e-05, | |
| "loss": 2.8102, | |
| "step": 91400 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 2.0853462157809986e-05, | |
| "loss": 2.7908, | |
| "step": 91500 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 2.0821256038647345e-05, | |
| "loss": 2.7978, | |
| "step": 91600 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 2.0789049919484704e-05, | |
| "loss": 2.7697, | |
| "step": 91700 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 2.0756843800322064e-05, | |
| "loss": 2.7937, | |
| "step": 91800 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 2.072463768115942e-05, | |
| "loss": 2.8289, | |
| "step": 91900 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 2.0692431561996782e-05, | |
| "loss": 2.8001, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 2.066022544283414e-05, | |
| "loss": 2.7772, | |
| "step": 92100 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 2.0628019323671498e-05, | |
| "loss": 2.7779, | |
| "step": 92200 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 2.0595813204508857e-05, | |
| "loss": 2.7996, | |
| "step": 92300 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 2.0563607085346216e-05, | |
| "loss": 2.7866, | |
| "step": 92400 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 2.0531400966183576e-05, | |
| "loss": 2.7746, | |
| "step": 92500 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 2.0499194847020935e-05, | |
| "loss": 2.8097, | |
| "step": 92600 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 2.0466988727858294e-05, | |
| "loss": 2.7903, | |
| "step": 92700 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 2.0434782608695654e-05, | |
| "loss": 2.7794, | |
| "step": 92800 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 2.0402576489533013e-05, | |
| "loss": 2.7772, | |
| "step": 92900 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 2.037037037037037e-05, | |
| "loss": 2.7863, | |
| "step": 93000 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 2.0338164251207732e-05, | |
| "loss": 2.8087, | |
| "step": 93100 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 2.0305958132045088e-05, | |
| "loss": 2.8106, | |
| "step": 93200 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 2.027375201288245e-05, | |
| "loss": 2.7916, | |
| "step": 93300 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 2.0241545893719806e-05, | |
| "loss": 2.7952, | |
| "step": 93400 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 2.020933977455717e-05, | |
| "loss": 2.7902, | |
| "step": 93500 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 2.0177133655394525e-05, | |
| "loss": 2.8145, | |
| "step": 93600 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 2.0144927536231885e-05, | |
| "loss": 2.7875, | |
| "step": 93700 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 2.0112721417069244e-05, | |
| "loss": 2.7653, | |
| "step": 93800 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 2.0080515297906603e-05, | |
| "loss": 2.7612, | |
| "step": 93900 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 2.0048309178743963e-05, | |
| "loss": 2.7735, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 2.0016103059581322e-05, | |
| "loss": 2.7625, | |
| "step": 94100 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 1.998389694041868e-05, | |
| "loss": 2.7749, | |
| "step": 94200 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 1.9951690821256037e-05, | |
| "loss": 2.7534, | |
| "step": 94300 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 1.99194847020934e-05, | |
| "loss": 2.7682, | |
| "step": 94400 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 1.9887278582930756e-05, | |
| "loss": 2.7758, | |
| "step": 94500 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 1.985507246376812e-05, | |
| "loss": 2.7695, | |
| "step": 94600 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 1.9822866344605475e-05, | |
| "loss": 2.7639, | |
| "step": 94700 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 1.9790660225442837e-05, | |
| "loss": 2.7859, | |
| "step": 94800 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 1.9758454106280193e-05, | |
| "loss": 2.7764, | |
| "step": 94900 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 1.9726247987117553e-05, | |
| "loss": 2.7729, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 1.9694041867954912e-05, | |
| "loss": 2.7882, | |
| "step": 95100 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 1.966183574879227e-05, | |
| "loss": 2.7683, | |
| "step": 95200 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 1.962962962962963e-05, | |
| "loss": 2.7647, | |
| "step": 95300 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 1.959742351046699e-05, | |
| "loss": 2.7779, | |
| "step": 95400 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 1.956521739130435e-05, | |
| "loss": 2.748, | |
| "step": 95500 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 1.953301127214171e-05, | |
| "loss": 2.7748, | |
| "step": 95600 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 1.9500805152979068e-05, | |
| "loss": 2.7665, | |
| "step": 95700 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 1.9468599033816428e-05, | |
| "loss": 2.7798, | |
| "step": 95800 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 1.9436392914653787e-05, | |
| "loss": 2.7687, | |
| "step": 95900 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 1.9404186795491143e-05, | |
| "loss": 2.7772, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 1.9371980676328502e-05, | |
| "loss": 2.7982, | |
| "step": 96100 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 1.933977455716586e-05, | |
| "loss": 2.7814, | |
| "step": 96200 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 1.930756843800322e-05, | |
| "loss": 2.7493, | |
| "step": 96300 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 1.927536231884058e-05, | |
| "loss": 2.7628, | |
| "step": 96400 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 1.924315619967794e-05, | |
| "loss": 2.7383, | |
| "step": 96500 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 1.92109500805153e-05, | |
| "loss": 2.7732, | |
| "step": 96600 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 1.917874396135266e-05, | |
| "loss": 2.7874, | |
| "step": 96700 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 1.9146537842190018e-05, | |
| "loss": 2.7626, | |
| "step": 96800 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 1.9114331723027377e-05, | |
| "loss": 2.782, | |
| "step": 96900 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 1.9082125603864733e-05, | |
| "loss": 2.779, | |
| "step": 97000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 1.9049919484702096e-05, | |
| "loss": 2.7991, | |
| "step": 97100 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 1.9017713365539452e-05, | |
| "loss": 2.783, | |
| "step": 97200 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 1.8985507246376814e-05, | |
| "loss": 2.7851, | |
| "step": 97300 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 1.895330112721417e-05, | |
| "loss": 2.8004, | |
| "step": 97400 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 1.892109500805153e-05, | |
| "loss": 2.7691, | |
| "step": 97500 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 1.888888888888889e-05, | |
| "loss": 2.7606, | |
| "step": 97600 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 1.885668276972625e-05, | |
| "loss": 2.7685, | |
| "step": 97700 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 1.8824476650563608e-05, | |
| "loss": 2.7662, | |
| "step": 97800 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 1.8792270531400967e-05, | |
| "loss": 2.7608, | |
| "step": 97900 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 1.8760064412238327e-05, | |
| "loss": 2.7677, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 1.8727858293075682e-05, | |
| "loss": 2.778, | |
| "step": 98100 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 1.8695652173913045e-05, | |
| "loss": 2.7742, | |
| "step": 98200 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 1.86634460547504e-05, | |
| "loss": 2.7837, | |
| "step": 98300 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 1.8631239935587764e-05, | |
| "loss": 2.7774, | |
| "step": 98400 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 1.859903381642512e-05, | |
| "loss": 2.8115, | |
| "step": 98500 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 1.8566827697262483e-05, | |
| "loss": 2.7943, | |
| "step": 98600 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 1.853462157809984e-05, | |
| "loss": 2.804, | |
| "step": 98700 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 1.85024154589372e-05, | |
| "loss": 2.7752, | |
| "step": 98800 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 1.8470209339774557e-05, | |
| "loss": 2.7689, | |
| "step": 98900 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 1.8438003220611917e-05, | |
| "loss": 2.7621, | |
| "step": 99000 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 1.8405797101449276e-05, | |
| "loss": 2.7741, | |
| "step": 99100 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 1.8373590982286635e-05, | |
| "loss": 2.7757, | |
| "step": 99200 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 1.8341384863123995e-05, | |
| "loss": 2.7747, | |
| "step": 99300 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 1.8309178743961354e-05, | |
| "loss": 2.7898, | |
| "step": 99400 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 1.8276972624798713e-05, | |
| "loss": 2.7775, | |
| "step": 99500 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 1.8244766505636073e-05, | |
| "loss": 2.7671, | |
| "step": 99600 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 1.8212560386473432e-05, | |
| "loss": 2.784, | |
| "step": 99700 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 1.8180354267310788e-05, | |
| "loss": 2.7866, | |
| "step": 99800 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 1.814814814814815e-05, | |
| "loss": 2.7636, | |
| "step": 99900 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 1.8115942028985507e-05, | |
| "loss": 2.7888, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 1.8083735909822866e-05, | |
| "loss": 2.7908, | |
| "step": 100100 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 1.8051529790660225e-05, | |
| "loss": 2.7477, | |
| "step": 100200 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 1.8019323671497585e-05, | |
| "loss": 2.7787, | |
| "step": 100300 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 1.7987117552334944e-05, | |
| "loss": 2.7733, | |
| "step": 100400 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 1.7954911433172304e-05, | |
| "loss": 2.7702, | |
| "step": 100500 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 1.7922705314009663e-05, | |
| "loss": 2.7802, | |
| "step": 100600 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 1.7890499194847022e-05, | |
| "loss": 2.7665, | |
| "step": 100700 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 1.785829307568438e-05, | |
| "loss": 2.7842, | |
| "step": 100800 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 1.782608695652174e-05, | |
| "loss": 2.8066, | |
| "step": 100900 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 1.77938808373591e-05, | |
| "loss": 2.7668, | |
| "step": 101000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 1.776167471819646e-05, | |
| "loss": 2.7665, | |
| "step": 101100 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 1.7729468599033816e-05, | |
| "loss": 2.7883, | |
| "step": 101200 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 1.769726247987118e-05, | |
| "loss": 2.7733, | |
| "step": 101300 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 1.7665056360708534e-05, | |
| "loss": 2.7688, | |
| "step": 101400 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 1.7632850241545894e-05, | |
| "loss": 2.7634, | |
| "step": 101500 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 1.7600644122383253e-05, | |
| "loss": 2.7543, | |
| "step": 101600 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 1.7568438003220612e-05, | |
| "loss": 2.7563, | |
| "step": 101700 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 1.7536231884057972e-05, | |
| "loss": 2.7511, | |
| "step": 101800 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 1.750402576489533e-05, | |
| "loss": 2.7667, | |
| "step": 101900 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 1.747181964573269e-05, | |
| "loss": 2.7703, | |
| "step": 102000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 1.7439613526570046e-05, | |
| "loss": 2.7711, | |
| "step": 102100 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 1.740740740740741e-05, | |
| "loss": 2.7846, | |
| "step": 102200 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 1.7375201288244765e-05, | |
| "loss": 2.7767, | |
| "step": 102300 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 1.7342995169082128e-05, | |
| "loss": 2.7751, | |
| "step": 102400 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 1.7310789049919484e-05, | |
| "loss": 2.76, | |
| "step": 102500 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 1.7278582930756847e-05, | |
| "loss": 2.7869, | |
| "step": 102600 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 1.7246376811594203e-05, | |
| "loss": 2.759, | |
| "step": 102700 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 1.7214170692431565e-05, | |
| "loss": 2.778, | |
| "step": 102800 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 1.718196457326892e-05, | |
| "loss": 2.7836, | |
| "step": 102900 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 1.714975845410628e-05, | |
| "loss": 2.748, | |
| "step": 103000 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 1.711755233494364e-05, | |
| "loss": 2.7511, | |
| "step": 103100 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 1.7085346215781e-05, | |
| "loss": 2.7787, | |
| "step": 103200 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 1.705314009661836e-05, | |
| "loss": 2.7732, | |
| "step": 103300 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 1.7020933977455718e-05, | |
| "loss": 2.7735, | |
| "step": 103400 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 1.6988727858293077e-05, | |
| "loss": 2.7658, | |
| "step": 103500 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 1.6956521739130433e-05, | |
| "loss": 2.7737, | |
| "step": 103600 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 1.6924315619967796e-05, | |
| "loss": 2.7989, | |
| "step": 103700 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 1.6892109500805152e-05, | |
| "loss": 2.7815, | |
| "step": 103800 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 1.6859903381642515e-05, | |
| "loss": 2.7763, | |
| "step": 103900 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 1.682769726247987e-05, | |
| "loss": 2.7728, | |
| "step": 104000 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 1.6795491143317233e-05, | |
| "loss": 2.7588, | |
| "step": 104100 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 1.676328502415459e-05, | |
| "loss": 2.7461, | |
| "step": 104200 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 1.673107890499195e-05, | |
| "loss": 2.7898, | |
| "step": 104300 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 1.6698872785829308e-05, | |
| "loss": 2.7885, | |
| "step": 104400 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 1.6666666666666667e-05, | |
| "loss": 2.7739, | |
| "step": 104500 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 1.6634460547504027e-05, | |
| "loss": 2.7645, | |
| "step": 104600 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 1.6602254428341386e-05, | |
| "loss": 2.7802, | |
| "step": 104700 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 1.6570048309178746e-05, | |
| "loss": 2.7774, | |
| "step": 104800 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 1.6537842190016105e-05, | |
| "loss": 2.763, | |
| "step": 104900 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 1.6505636070853464e-05, | |
| "loss": 2.7799, | |
| "step": 105000 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 1.6473429951690824e-05, | |
| "loss": 2.7713, | |
| "step": 105100 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 1.644122383252818e-05, | |
| "loss": 2.775, | |
| "step": 105200 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 1.640901771336554e-05, | |
| "loss": 2.7762, | |
| "step": 105300 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 1.6376811594202898e-05, | |
| "loss": 2.7771, | |
| "step": 105400 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 1.6344605475040258e-05, | |
| "loss": 2.7875, | |
| "step": 105500 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 1.6312399355877617e-05, | |
| "loss": 2.7568, | |
| "step": 105600 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 1.6280193236714976e-05, | |
| "loss": 2.7611, | |
| "step": 105700 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 1.6247987117552336e-05, | |
| "loss": 2.774, | |
| "step": 105800 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 1.6215780998389695e-05, | |
| "loss": 2.7967, | |
| "step": 105900 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 1.6183574879227054e-05, | |
| "loss": 2.7724, | |
| "step": 106000 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 1.6151368760064414e-05, | |
| "loss": 2.7948, | |
| "step": 106100 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 1.6119162640901773e-05, | |
| "loss": 2.7626, | |
| "step": 106200 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 1.608695652173913e-05, | |
| "loss": 2.7769, | |
| "step": 106300 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 1.6054750402576492e-05, | |
| "loss": 2.7456, | |
| "step": 106400 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 1.6022544283413848e-05, | |
| "loss": 2.7788, | |
| "step": 106500 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 1.599033816425121e-05, | |
| "loss": 2.7688, | |
| "step": 106600 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 1.5958132045088566e-05, | |
| "loss": 2.7825, | |
| "step": 106700 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 1.5925925925925926e-05, | |
| "loss": 2.7784, | |
| "step": 106800 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 1.5893719806763285e-05, | |
| "loss": 2.7897, | |
| "step": 106900 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 1.5861513687600644e-05, | |
| "loss": 2.7611, | |
| "step": 107000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 1.5829307568438004e-05, | |
| "loss": 2.7706, | |
| "step": 107100 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 1.5797101449275363e-05, | |
| "loss": 2.7604, | |
| "step": 107200 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 1.5764895330112723e-05, | |
| "loss": 2.7562, | |
| "step": 107300 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 1.573268921095008e-05, | |
| "loss": 2.7804, | |
| "step": 107400 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 1.570048309178744e-05, | |
| "loss": 2.7732, | |
| "step": 107500 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 1.5668276972624797e-05, | |
| "loss": 2.7837, | |
| "step": 107600 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 1.563607085346216e-05, | |
| "loss": 2.7764, | |
| "step": 107700 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 1.5603864734299516e-05, | |
| "loss": 2.7631, | |
| "step": 107800 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 1.557165861513688e-05, | |
| "loss": 2.7799, | |
| "step": 107900 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 1.5539452495974235e-05, | |
| "loss": 2.7735, | |
| "step": 108000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 1.5507246376811597e-05, | |
| "loss": 2.7559, | |
| "step": 108100 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 1.5475040257648953e-05, | |
| "loss": 2.7801, | |
| "step": 108200 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 1.5442834138486313e-05, | |
| "loss": 2.7813, | |
| "step": 108300 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 1.5410628019323672e-05, | |
| "loss": 2.7771, | |
| "step": 108400 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 1.537842190016103e-05, | |
| "loss": 2.7749, | |
| "step": 108500 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 1.534621578099839e-05, | |
| "loss": 2.7739, | |
| "step": 108600 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 1.531400966183575e-05, | |
| "loss": 2.7484, | |
| "step": 108700 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 1.528180354267311e-05, | |
| "loss": 2.7949, | |
| "step": 108800 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 1.5249597423510467e-05, | |
| "loss": 2.7832, | |
| "step": 108900 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 1.5217391304347828e-05, | |
| "loss": 2.7844, | |
| "step": 109000 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 1.5185185185185186e-05, | |
| "loss": 2.7597, | |
| "step": 109100 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 1.5152979066022547e-05, | |
| "loss": 2.7675, | |
| "step": 109200 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 1.5120772946859905e-05, | |
| "loss": 2.7787, | |
| "step": 109300 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 1.5088566827697262e-05, | |
| "loss": 2.7745, | |
| "step": 109400 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 1.5056360708534623e-05, | |
| "loss": 2.7835, | |
| "step": 109500 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 1.5024154589371981e-05, | |
| "loss": 2.7636, | |
| "step": 109600 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 1.499194847020934e-05, | |
| "loss": 2.7596, | |
| "step": 109700 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 1.49597423510467e-05, | |
| "loss": 2.7606, | |
| "step": 109800 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 1.4927536231884059e-05, | |
| "loss": 2.7677, | |
| "step": 109900 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 1.4895330112721417e-05, | |
| "loss": 2.7842, | |
| "step": 110000 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 1.4863123993558778e-05, | |
| "loss": 2.7774, | |
| "step": 110100 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 1.4830917874396135e-05, | |
| "loss": 2.786, | |
| "step": 110200 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 1.4798711755233496e-05, | |
| "loss": 2.7628, | |
| "step": 110300 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 1.4766505636070854e-05, | |
| "loss": 2.7699, | |
| "step": 110400 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 1.4734299516908212e-05, | |
| "loss": 2.7805, | |
| "step": 110500 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 1.4702093397745573e-05, | |
| "loss": 2.757, | |
| "step": 110600 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 1.466988727858293e-05, | |
| "loss": 2.768, | |
| "step": 110700 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 1.4637681159420291e-05, | |
| "loss": 2.7504, | |
| "step": 110800 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 1.4605475040257649e-05, | |
| "loss": 2.7523, | |
| "step": 110900 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 1.457326892109501e-05, | |
| "loss": 2.7558, | |
| "step": 111000 | |
| } | |
| ], | |
| "max_steps": 156250, | |
| "num_train_epochs": 1, | |
| "total_flos": 3.0315746129075896e+18, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |