{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 12972, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 7.70891150169596e-05, "grad_norm": 8.23936653137207, "learning_rate": 2.5641025641025643e-08, "loss": 1.5914, "step": 1 }, { "epoch": 0.0001541782300339192, "grad_norm": 8.8346586227417, "learning_rate": 5.1282051282051286e-08, "loss": 1.5383, "step": 2 }, { "epoch": 0.00023126734505087883, "grad_norm": 10.278841972351074, "learning_rate": 7.692307692307694e-08, "loss": 1.6608, "step": 3 }, { "epoch": 0.0003083564600678384, "grad_norm": 11.908944129943848, "learning_rate": 1.0256410256410257e-07, "loss": 1.6491, "step": 4 }, { "epoch": 0.000385445575084798, "grad_norm": 8.718999862670898, "learning_rate": 1.282051282051282e-07, "loss": 1.5346, "step": 5 }, { "epoch": 0.00046253469010175765, "grad_norm": 8.059788703918457, "learning_rate": 1.5384615384615387e-07, "loss": 1.53, "step": 6 }, { "epoch": 0.0005396238051187173, "grad_norm": 9.216470718383789, "learning_rate": 1.7948717948717948e-07, "loss": 1.5931, "step": 7 }, { "epoch": 0.0006167129201356768, "grad_norm": 9.295415878295898, "learning_rate": 2.0512820512820514e-07, "loss": 1.5061, "step": 8 }, { "epoch": 0.0006938020351526364, "grad_norm": 7.587814807891846, "learning_rate": 2.307692307692308e-07, "loss": 1.5034, "step": 9 }, { "epoch": 0.000770891150169596, "grad_norm": 10.28200912475586, "learning_rate": 2.564102564102564e-07, "loss": 1.5683, "step": 10 }, { "epoch": 0.0008479802651865557, "grad_norm": 10.186943054199219, "learning_rate": 2.820512820512821e-07, "loss": 1.612, "step": 11 }, { "epoch": 0.0009250693802035153, "grad_norm": 8.96688175201416, "learning_rate": 3.0769230769230774e-07, "loss": 1.5478, "step": 12 }, { "epoch": 0.0010021584952204748, "grad_norm": 8.598114013671875, "learning_rate": 3.3333333333333335e-07, "loss": 1.77, "step": 13 }, { "epoch": 0.0010792476102374346, "grad_norm": 8.468049049377441, "learning_rate": 3.5897435897435896e-07, "loss": 1.5732, "step": 14 }, { "epoch": 0.001156336725254394, "grad_norm": 8.353447914123535, "learning_rate": 3.846153846153847e-07, "loss": 1.6234, "step": 15 }, { "epoch": 0.0012334258402713536, "grad_norm": 8.753851890563965, "learning_rate": 4.102564102564103e-07, "loss": 1.6226, "step": 16 }, { "epoch": 0.0013105149552883133, "grad_norm": 7.985566139221191, "learning_rate": 4.358974358974359e-07, "loss": 1.5986, "step": 17 }, { "epoch": 0.0013876040703052729, "grad_norm": 9.336605072021484, "learning_rate": 4.615384615384616e-07, "loss": 1.4641, "step": 18 }, { "epoch": 0.0014646931853222326, "grad_norm": 8.00399398803711, "learning_rate": 4.871794871794872e-07, "loss": 1.5511, "step": 19 }, { "epoch": 0.001541782300339192, "grad_norm": 8.389213562011719, "learning_rate": 5.128205128205128e-07, "loss": 1.5059, "step": 20 }, { "epoch": 0.0016188714153561516, "grad_norm": 14.020890235900879, "learning_rate": 5.384615384615386e-07, "loss": 1.7145, "step": 21 }, { "epoch": 0.0016959605303731114, "grad_norm": 6.704775810241699, "learning_rate": 5.641025641025642e-07, "loss": 1.5278, "step": 22 }, { "epoch": 0.0017730496453900709, "grad_norm": 6.921572208404541, "learning_rate": 5.897435897435898e-07, "loss": 1.502, "step": 23 }, { "epoch": 0.0018501387604070306, "grad_norm": 11.839808464050293, "learning_rate": 6.153846153846155e-07, "loss": 1.4523, "step": 24 }, { "epoch": 0.0019272278754239901, "grad_norm": 7.50053071975708, "learning_rate": 6.41025641025641e-07, "loss": 1.4585, "step": 25 }, { "epoch": 0.0020043169904409497, "grad_norm": 8.217403411865234, "learning_rate": 6.666666666666667e-07, "loss": 1.5492, "step": 26 }, { "epoch": 0.0020814061054579094, "grad_norm": 7.217784881591797, "learning_rate": 6.923076923076924e-07, "loss": 1.424, "step": 27 }, { "epoch": 0.002158495220474869, "grad_norm": 7.2611799240112305, "learning_rate": 7.179487179487179e-07, "loss": 1.4658, "step": 28 }, { "epoch": 0.0022355843354918284, "grad_norm": 6.959589958190918, "learning_rate": 7.435897435897436e-07, "loss": 1.4955, "step": 29 }, { "epoch": 0.002312673450508788, "grad_norm": 8.893731117248535, "learning_rate": 7.692307692307694e-07, "loss": 1.3793, "step": 30 }, { "epoch": 0.002389762565525748, "grad_norm": 6.077394485473633, "learning_rate": 7.948717948717949e-07, "loss": 1.4154, "step": 31 }, { "epoch": 0.002466851680542707, "grad_norm": 6.705117225646973, "learning_rate": 8.205128205128206e-07, "loss": 1.4288, "step": 32 }, { "epoch": 0.002543940795559667, "grad_norm": 6.546511173248291, "learning_rate": 8.461538461538463e-07, "loss": 1.3884, "step": 33 }, { "epoch": 0.0026210299105766267, "grad_norm": 6.519981384277344, "learning_rate": 8.717948717948718e-07, "loss": 1.3881, "step": 34 }, { "epoch": 0.002698119025593586, "grad_norm": 7.000728130340576, "learning_rate": 8.974358974358975e-07, "loss": 1.4235, "step": 35 }, { "epoch": 0.0027752081406105457, "grad_norm": 6.453318119049072, "learning_rate": 9.230769230769232e-07, "loss": 1.4284, "step": 36 }, { "epoch": 0.0028522972556275054, "grad_norm": 5.742129802703857, "learning_rate": 9.487179487179487e-07, "loss": 1.4163, "step": 37 }, { "epoch": 0.002929386370644465, "grad_norm": 5.8094916343688965, "learning_rate": 9.743589743589745e-07, "loss": 1.293, "step": 38 }, { "epoch": 0.0030064754856614245, "grad_norm": 6.008228302001953, "learning_rate": 1.0000000000000002e-06, "loss": 1.4182, "step": 39 }, { "epoch": 0.003083564600678384, "grad_norm": 5.845043659210205, "learning_rate": 1.0256410256410257e-06, "loss": 1.444, "step": 40 }, { "epoch": 0.003160653715695344, "grad_norm": 5.4758806228637695, "learning_rate": 1.0512820512820514e-06, "loss": 1.396, "step": 41 }, { "epoch": 0.0032377428307123032, "grad_norm": 5.759223937988281, "learning_rate": 1.076923076923077e-06, "loss": 1.3574, "step": 42 }, { "epoch": 0.003314831945729263, "grad_norm": 6.016756534576416, "learning_rate": 1.1025641025641026e-06, "loss": 1.3853, "step": 43 }, { "epoch": 0.0033919210607462227, "grad_norm": 6.545979976654053, "learning_rate": 1.1282051282051283e-06, "loss": 1.492, "step": 44 }, { "epoch": 0.003469010175763182, "grad_norm": 5.281697750091553, "learning_rate": 1.153846153846154e-06, "loss": 1.3644, "step": 45 }, { "epoch": 0.0035460992907801418, "grad_norm": 5.430404186248779, "learning_rate": 1.1794871794871795e-06, "loss": 1.3613, "step": 46 }, { "epoch": 0.0036231884057971015, "grad_norm": 5.730790138244629, "learning_rate": 1.2051282051282053e-06, "loss": 1.349, "step": 47 }, { "epoch": 0.0037002775208140612, "grad_norm": 7.0034098625183105, "learning_rate": 1.230769230769231e-06, "loss": 1.4167, "step": 48 }, { "epoch": 0.0037773666358310205, "grad_norm": 5.198720932006836, "learning_rate": 1.2564102564102565e-06, "loss": 1.2908, "step": 49 }, { "epoch": 0.0038544557508479803, "grad_norm": 5.8838605880737305, "learning_rate": 1.282051282051282e-06, "loss": 1.4535, "step": 50 }, { "epoch": 0.00393154486586494, "grad_norm": 5.737644672393799, "learning_rate": 1.307692307692308e-06, "loss": 1.2674, "step": 51 }, { "epoch": 0.004008633980881899, "grad_norm": 5.439259052276611, "learning_rate": 1.3333333333333334e-06, "loss": 1.327, "step": 52 }, { "epoch": 0.0040857230958988595, "grad_norm": 5.283633708953857, "learning_rate": 1.358974358974359e-06, "loss": 1.2557, "step": 53 }, { "epoch": 0.004162812210915819, "grad_norm": 5.148982524871826, "learning_rate": 1.3846153846153848e-06, "loss": 1.3224, "step": 54 }, { "epoch": 0.004239901325932778, "grad_norm": 5.781235694885254, "learning_rate": 1.4102564102564104e-06, "loss": 1.3519, "step": 55 }, { "epoch": 0.004316990440949738, "grad_norm": 5.436124801635742, "learning_rate": 1.4358974358974359e-06, "loss": 1.2495, "step": 56 }, { "epoch": 0.0043940795559666975, "grad_norm": 5.363502025604248, "learning_rate": 1.4615384615384618e-06, "loss": 1.304, "step": 57 }, { "epoch": 0.004471168670983657, "grad_norm": 5.214340686798096, "learning_rate": 1.4871794871794873e-06, "loss": 1.1908, "step": 58 }, { "epoch": 0.004548257786000617, "grad_norm": 5.095510959625244, "learning_rate": 1.5128205128205128e-06, "loss": 1.3102, "step": 59 }, { "epoch": 0.004625346901017576, "grad_norm": 4.8631157875061035, "learning_rate": 1.5384615384615387e-06, "loss": 1.3529, "step": 60 }, { "epoch": 0.004702436016034536, "grad_norm": 4.890917778015137, "learning_rate": 1.5641025641025642e-06, "loss": 1.1276, "step": 61 }, { "epoch": 0.004779525131051496, "grad_norm": 5.280998706817627, "learning_rate": 1.5897435897435897e-06, "loss": 1.2194, "step": 62 }, { "epoch": 0.004856614246068455, "grad_norm": 5.293563365936279, "learning_rate": 1.6153846153846157e-06, "loss": 1.319, "step": 63 }, { "epoch": 0.004933703361085414, "grad_norm": 5.016965866088867, "learning_rate": 1.6410256410256412e-06, "loss": 1.3868, "step": 64 }, { "epoch": 0.0050107924761023746, "grad_norm": 4.839664936065674, "learning_rate": 1.6666666666666667e-06, "loss": 1.2705, "step": 65 }, { "epoch": 0.005087881591119334, "grad_norm": 4.863608360290527, "learning_rate": 1.6923076923076926e-06, "loss": 1.2731, "step": 66 }, { "epoch": 0.005164970706136293, "grad_norm": 5.9279656410217285, "learning_rate": 1.717948717948718e-06, "loss": 1.324, "step": 67 }, { "epoch": 0.005242059821153253, "grad_norm": 4.750378608703613, "learning_rate": 1.7435897435897436e-06, "loss": 1.2538, "step": 68 }, { "epoch": 0.005319148936170213, "grad_norm": 5.045099258422852, "learning_rate": 1.7692307692307695e-06, "loss": 1.2533, "step": 69 }, { "epoch": 0.005396238051187172, "grad_norm": 4.8463006019592285, "learning_rate": 1.794871794871795e-06, "loss": 1.3231, "step": 70 }, { "epoch": 0.005473327166204132, "grad_norm": 4.522879600524902, "learning_rate": 1.8205128205128205e-06, "loss": 1.3319, "step": 71 }, { "epoch": 0.005550416281221091, "grad_norm": 5.500392913818359, "learning_rate": 1.8461538461538465e-06, "loss": 1.2909, "step": 72 }, { "epoch": 0.005627505396238052, "grad_norm": 4.754296779632568, "learning_rate": 1.871794871794872e-06, "loss": 1.2207, "step": 73 }, { "epoch": 0.005704594511255011, "grad_norm": 4.711799144744873, "learning_rate": 1.8974358974358975e-06, "loss": 1.2839, "step": 74 }, { "epoch": 0.00578168362627197, "grad_norm": 5.301671504974365, "learning_rate": 1.9230769230769234e-06, "loss": 1.3236, "step": 75 }, { "epoch": 0.00585877274128893, "grad_norm": 4.835858345031738, "learning_rate": 1.948717948717949e-06, "loss": 1.2828, "step": 76 }, { "epoch": 0.00593586185630589, "grad_norm": 5.167266368865967, "learning_rate": 1.9743589743589744e-06, "loss": 1.3363, "step": 77 }, { "epoch": 0.006012950971322849, "grad_norm": 5.193398952484131, "learning_rate": 2.0000000000000003e-06, "loss": 1.2662, "step": 78 }, { "epoch": 0.006090040086339809, "grad_norm": 4.928783416748047, "learning_rate": 2.025641025641026e-06, "loss": 1.2551, "step": 79 }, { "epoch": 0.006167129201356768, "grad_norm": 5.207440376281738, "learning_rate": 2.0512820512820513e-06, "loss": 1.4568, "step": 80 }, { "epoch": 0.006244218316373728, "grad_norm": 5.189006805419922, "learning_rate": 2.0769230769230773e-06, "loss": 1.1864, "step": 81 }, { "epoch": 0.006321307431390688, "grad_norm": 5.419140338897705, "learning_rate": 2.1025641025641028e-06, "loss": 1.3153, "step": 82 }, { "epoch": 0.006398396546407647, "grad_norm": 4.961038589477539, "learning_rate": 2.1282051282051283e-06, "loss": 1.2585, "step": 83 }, { "epoch": 0.0064754856614246065, "grad_norm": 4.714632034301758, "learning_rate": 2.153846153846154e-06, "loss": 1.2694, "step": 84 }, { "epoch": 0.006552574776441567, "grad_norm": 5.331617832183838, "learning_rate": 2.1794871794871797e-06, "loss": 1.3765, "step": 85 }, { "epoch": 0.006629663891458526, "grad_norm": 4.9930644035339355, "learning_rate": 2.2051282051282052e-06, "loss": 1.3456, "step": 86 }, { "epoch": 0.006706753006475485, "grad_norm": 4.928962230682373, "learning_rate": 2.230769230769231e-06, "loss": 1.2938, "step": 87 }, { "epoch": 0.0067838421214924454, "grad_norm": 5.059289932250977, "learning_rate": 2.2564102564102566e-06, "loss": 1.2987, "step": 88 }, { "epoch": 0.006860931236509405, "grad_norm": 4.943025588989258, "learning_rate": 2.282051282051282e-06, "loss": 1.2336, "step": 89 }, { "epoch": 0.006938020351526364, "grad_norm": 5.614814758300781, "learning_rate": 2.307692307692308e-06, "loss": 1.1704, "step": 90 }, { "epoch": 0.007015109466543324, "grad_norm": 4.728719234466553, "learning_rate": 2.3333333333333336e-06, "loss": 1.301, "step": 91 }, { "epoch": 0.0070921985815602835, "grad_norm": 5.109025478363037, "learning_rate": 2.358974358974359e-06, "loss": 1.3068, "step": 92 }, { "epoch": 0.007169287696577244, "grad_norm": 5.220505237579346, "learning_rate": 2.384615384615385e-06, "loss": 1.3854, "step": 93 }, { "epoch": 0.007246376811594203, "grad_norm": 4.759350776672363, "learning_rate": 2.4102564102564105e-06, "loss": 1.2805, "step": 94 }, { "epoch": 0.007323465926611162, "grad_norm": 5.338301181793213, "learning_rate": 2.435897435897436e-06, "loss": 1.2815, "step": 95 }, { "epoch": 0.0074005550416281225, "grad_norm": 5.214847087860107, "learning_rate": 2.461538461538462e-06, "loss": 1.1316, "step": 96 }, { "epoch": 0.007477644156645082, "grad_norm": 4.639960289001465, "learning_rate": 2.4871794871794875e-06, "loss": 1.2396, "step": 97 }, { "epoch": 0.007554733271662041, "grad_norm": 4.69920015335083, "learning_rate": 2.512820512820513e-06, "loss": 1.2721, "step": 98 }, { "epoch": 0.007631822386679001, "grad_norm": 4.799991607666016, "learning_rate": 2.5384615384615385e-06, "loss": 1.3245, "step": 99 }, { "epoch": 0.0077089115016959605, "grad_norm": 4.986448764801025, "learning_rate": 2.564102564102564e-06, "loss": 1.2549, "step": 100 }, { "epoch": 0.00778600061671292, "grad_norm": 4.512473106384277, "learning_rate": 2.5897435897435903e-06, "loss": 1.2412, "step": 101 }, { "epoch": 0.00786308973172988, "grad_norm": 5.038204669952393, "learning_rate": 2.615384615384616e-06, "loss": 1.3146, "step": 102 }, { "epoch": 0.00794017884674684, "grad_norm": 5.079225540161133, "learning_rate": 2.6410256410256413e-06, "loss": 1.3502, "step": 103 }, { "epoch": 0.008017267961763799, "grad_norm": 4.596055507659912, "learning_rate": 2.666666666666667e-06, "loss": 1.2562, "step": 104 }, { "epoch": 0.008094357076780759, "grad_norm": 4.764355659484863, "learning_rate": 2.6923076923076923e-06, "loss": 1.2504, "step": 105 }, { "epoch": 0.008171446191797719, "grad_norm": 4.672957897186279, "learning_rate": 2.717948717948718e-06, "loss": 1.3247, "step": 106 }, { "epoch": 0.008248535306814677, "grad_norm": 5.352134704589844, "learning_rate": 2.743589743589744e-06, "loss": 1.2011, "step": 107 }, { "epoch": 0.008325624421831638, "grad_norm": 5.508421897888184, "learning_rate": 2.7692307692307697e-06, "loss": 1.275, "step": 108 }, { "epoch": 0.008402713536848598, "grad_norm": 4.892576694488525, "learning_rate": 2.794871794871795e-06, "loss": 1.2882, "step": 109 }, { "epoch": 0.008479802651865556, "grad_norm": 5.162463665008545, "learning_rate": 2.8205128205128207e-06, "loss": 1.2715, "step": 110 }, { "epoch": 0.008556891766882516, "grad_norm": 4.8386549949646, "learning_rate": 2.846153846153846e-06, "loss": 1.2729, "step": 111 }, { "epoch": 0.008633980881899476, "grad_norm": 4.82133150100708, "learning_rate": 2.8717948717948717e-06, "loss": 1.2241, "step": 112 }, { "epoch": 0.008711069996916435, "grad_norm": 4.835055828094482, "learning_rate": 2.897435897435898e-06, "loss": 1.1711, "step": 113 }, { "epoch": 0.008788159111933395, "grad_norm": 5.417538642883301, "learning_rate": 2.9230769230769236e-06, "loss": 1.1772, "step": 114 }, { "epoch": 0.008865248226950355, "grad_norm": 5.100035190582275, "learning_rate": 2.948717948717949e-06, "loss": 1.2341, "step": 115 }, { "epoch": 0.008942337341967314, "grad_norm": 4.800838470458984, "learning_rate": 2.9743589743589746e-06, "loss": 1.2045, "step": 116 }, { "epoch": 0.009019426456984274, "grad_norm": 4.5708184242248535, "learning_rate": 3e-06, "loss": 1.1864, "step": 117 }, { "epoch": 0.009096515572001234, "grad_norm": 5.127934455871582, "learning_rate": 3.0256410256410256e-06, "loss": 1.293, "step": 118 }, { "epoch": 0.009173604687018192, "grad_norm": 4.894773960113525, "learning_rate": 3.051282051282052e-06, "loss": 1.2704, "step": 119 }, { "epoch": 0.009250693802035153, "grad_norm": 5.011133193969727, "learning_rate": 3.0769230769230774e-06, "loss": 1.3439, "step": 120 }, { "epoch": 0.009327782917052113, "grad_norm": 4.952587127685547, "learning_rate": 3.102564102564103e-06, "loss": 1.2353, "step": 121 }, { "epoch": 0.009404872032069071, "grad_norm": 4.612563610076904, "learning_rate": 3.1282051282051284e-06, "loss": 1.2477, "step": 122 }, { "epoch": 0.009481961147086031, "grad_norm": 4.67457389831543, "learning_rate": 3.153846153846154e-06, "loss": 1.3014, "step": 123 }, { "epoch": 0.009559050262102992, "grad_norm": 4.838489532470703, "learning_rate": 3.1794871794871795e-06, "loss": 1.2135, "step": 124 }, { "epoch": 0.00963613937711995, "grad_norm": 5.4575042724609375, "learning_rate": 3.205128205128206e-06, "loss": 1.3339, "step": 125 }, { "epoch": 0.00971322849213691, "grad_norm": 4.730915069580078, "learning_rate": 3.2307692307692313e-06, "loss": 1.2298, "step": 126 }, { "epoch": 0.00979031760715387, "grad_norm": 5.235102653503418, "learning_rate": 3.256410256410257e-06, "loss": 1.195, "step": 127 }, { "epoch": 0.009867406722170829, "grad_norm": 4.515792369842529, "learning_rate": 3.2820512820512823e-06, "loss": 1.2009, "step": 128 }, { "epoch": 0.009944495837187789, "grad_norm": 4.882987022399902, "learning_rate": 3.307692307692308e-06, "loss": 1.3124, "step": 129 }, { "epoch": 0.010021584952204749, "grad_norm": 4.703249931335449, "learning_rate": 3.3333333333333333e-06, "loss": 1.2973, "step": 130 }, { "epoch": 0.010098674067221708, "grad_norm": 5.116337776184082, "learning_rate": 3.358974358974359e-06, "loss": 1.3368, "step": 131 }, { "epoch": 0.010175763182238668, "grad_norm": 4.505654335021973, "learning_rate": 3.384615384615385e-06, "loss": 1.12, "step": 132 }, { "epoch": 0.010252852297255628, "grad_norm": 4.808957576751709, "learning_rate": 3.4102564102564107e-06, "loss": 1.3359, "step": 133 }, { "epoch": 0.010329941412272586, "grad_norm": 5.045513153076172, "learning_rate": 3.435897435897436e-06, "loss": 1.2173, "step": 134 }, { "epoch": 0.010407030527289547, "grad_norm": 4.800675868988037, "learning_rate": 3.4615384615384617e-06, "loss": 1.3637, "step": 135 }, { "epoch": 0.010484119642306507, "grad_norm": 4.922788619995117, "learning_rate": 3.487179487179487e-06, "loss": 1.2737, "step": 136 }, { "epoch": 0.010561208757323465, "grad_norm": 5.645283222198486, "learning_rate": 3.5128205128205127e-06, "loss": 1.3652, "step": 137 }, { "epoch": 0.010638297872340425, "grad_norm": 5.270390510559082, "learning_rate": 3.538461538461539e-06, "loss": 1.2676, "step": 138 }, { "epoch": 0.010715386987357385, "grad_norm": 5.082615375518799, "learning_rate": 3.5641025641025646e-06, "loss": 1.2817, "step": 139 }, { "epoch": 0.010792476102374344, "grad_norm": 4.738155841827393, "learning_rate": 3.58974358974359e-06, "loss": 1.2964, "step": 140 }, { "epoch": 0.010869565217391304, "grad_norm": 4.902276992797852, "learning_rate": 3.6153846153846156e-06, "loss": 1.143, "step": 141 }, { "epoch": 0.010946654332408264, "grad_norm": 4.572447776794434, "learning_rate": 3.641025641025641e-06, "loss": 1.2367, "step": 142 }, { "epoch": 0.011023743447425224, "grad_norm": 5.859162330627441, "learning_rate": 3.6666666666666666e-06, "loss": 1.2027, "step": 143 }, { "epoch": 0.011100832562442183, "grad_norm": 5.429625034332275, "learning_rate": 3.692307692307693e-06, "loss": 1.3681, "step": 144 }, { "epoch": 0.011177921677459143, "grad_norm": 4.913431167602539, "learning_rate": 3.7179487179487184e-06, "loss": 1.1966, "step": 145 }, { "epoch": 0.011255010792476103, "grad_norm": 5.548055648803711, "learning_rate": 3.743589743589744e-06, "loss": 1.1941, "step": 146 }, { "epoch": 0.011332099907493062, "grad_norm": 4.827651500701904, "learning_rate": 3.7692307692307694e-06, "loss": 1.27, "step": 147 }, { "epoch": 0.011409189022510022, "grad_norm": 4.984038829803467, "learning_rate": 3.794871794871795e-06, "loss": 1.2527, "step": 148 }, { "epoch": 0.011486278137526982, "grad_norm": 4.7654032707214355, "learning_rate": 3.8205128205128204e-06, "loss": 1.1242, "step": 149 }, { "epoch": 0.01156336725254394, "grad_norm": 4.45728874206543, "learning_rate": 3.846153846153847e-06, "loss": 1.2219, "step": 150 }, { "epoch": 0.0116404563675609, "grad_norm": 4.937605381011963, "learning_rate": 3.871794871794872e-06, "loss": 1.2036, "step": 151 }, { "epoch": 0.01171754548257786, "grad_norm": 5.271725654602051, "learning_rate": 3.897435897435898e-06, "loss": 1.3552, "step": 152 }, { "epoch": 0.01179463459759482, "grad_norm": 4.775473117828369, "learning_rate": 3.923076923076923e-06, "loss": 1.1872, "step": 153 }, { "epoch": 0.01187172371261178, "grad_norm": 5.3986358642578125, "learning_rate": 3.948717948717949e-06, "loss": 1.3389, "step": 154 }, { "epoch": 0.01194881282762874, "grad_norm": 4.677184581756592, "learning_rate": 3.974358974358974e-06, "loss": 1.2231, "step": 155 }, { "epoch": 0.012025901942645698, "grad_norm": 5.348373889923096, "learning_rate": 4.000000000000001e-06, "loss": 1.2328, "step": 156 }, { "epoch": 0.012102991057662658, "grad_norm": 4.47370719909668, "learning_rate": 4.025641025641026e-06, "loss": 1.215, "step": 157 }, { "epoch": 0.012180080172679618, "grad_norm": 5.143750190734863, "learning_rate": 4.051282051282052e-06, "loss": 1.2161, "step": 158 }, { "epoch": 0.012257169287696577, "grad_norm": 5.946848392486572, "learning_rate": 4.076923076923077e-06, "loss": 1.1238, "step": 159 }, { "epoch": 0.012334258402713537, "grad_norm": 4.650530815124512, "learning_rate": 4.102564102564103e-06, "loss": 1.2754, "step": 160 }, { "epoch": 0.012411347517730497, "grad_norm": 4.534331798553467, "learning_rate": 4.128205128205128e-06, "loss": 1.2325, "step": 161 }, { "epoch": 0.012488436632747455, "grad_norm": 5.198877334594727, "learning_rate": 4.1538461538461545e-06, "loss": 1.2318, "step": 162 }, { "epoch": 0.012565525747764416, "grad_norm": 4.763823986053467, "learning_rate": 4.17948717948718e-06, "loss": 1.2049, "step": 163 }, { "epoch": 0.012642614862781376, "grad_norm": 5.088624000549316, "learning_rate": 4.2051282051282055e-06, "loss": 1.2649, "step": 164 }, { "epoch": 0.012719703977798334, "grad_norm": 5.234319686889648, "learning_rate": 4.230769230769231e-06, "loss": 1.308, "step": 165 }, { "epoch": 0.012796793092815294, "grad_norm": 4.7018537521362305, "learning_rate": 4.2564102564102566e-06, "loss": 1.2356, "step": 166 }, { "epoch": 0.012873882207832255, "grad_norm": 5.618417263031006, "learning_rate": 4.282051282051282e-06, "loss": 1.1963, "step": 167 }, { "epoch": 0.012950971322849213, "grad_norm": 5.07379674911499, "learning_rate": 4.307692307692308e-06, "loss": 1.2074, "step": 168 }, { "epoch": 0.013028060437866173, "grad_norm": 4.746245861053467, "learning_rate": 4.333333333333334e-06, "loss": 1.302, "step": 169 }, { "epoch": 0.013105149552883133, "grad_norm": 4.660124778747559, "learning_rate": 4.358974358974359e-06, "loss": 1.2835, "step": 170 }, { "epoch": 0.013182238667900092, "grad_norm": 4.880448818206787, "learning_rate": 4.384615384615385e-06, "loss": 1.3333, "step": 171 }, { "epoch": 0.013259327782917052, "grad_norm": 5.0500359535217285, "learning_rate": 4.4102564102564104e-06, "loss": 1.2318, "step": 172 }, { "epoch": 0.013336416897934012, "grad_norm": 4.640452861785889, "learning_rate": 4.435897435897436e-06, "loss": 1.1425, "step": 173 }, { "epoch": 0.01341350601295097, "grad_norm": 4.400023460388184, "learning_rate": 4.461538461538462e-06, "loss": 1.319, "step": 174 }, { "epoch": 0.01349059512796793, "grad_norm": 4.9543046951293945, "learning_rate": 4.487179487179488e-06, "loss": 1.1423, "step": 175 }, { "epoch": 0.013567684242984891, "grad_norm": 5.030699729919434, "learning_rate": 4.512820512820513e-06, "loss": 1.2126, "step": 176 }, { "epoch": 0.01364477335800185, "grad_norm": 5.696242809295654, "learning_rate": 4.538461538461539e-06, "loss": 1.4041, "step": 177 }, { "epoch": 0.01372186247301881, "grad_norm": 5.10263729095459, "learning_rate": 4.564102564102564e-06, "loss": 1.2331, "step": 178 }, { "epoch": 0.01379895158803577, "grad_norm": 4.404088497161865, "learning_rate": 4.58974358974359e-06, "loss": 1.2334, "step": 179 }, { "epoch": 0.013876040703052728, "grad_norm": 4.599002361297607, "learning_rate": 4.615384615384616e-06, "loss": 1.0995, "step": 180 }, { "epoch": 0.013953129818069688, "grad_norm": 5.312249660491943, "learning_rate": 4.641025641025642e-06, "loss": 1.3024, "step": 181 }, { "epoch": 0.014030218933086648, "grad_norm": 4.873595714569092, "learning_rate": 4.666666666666667e-06, "loss": 1.3503, "step": 182 }, { "epoch": 0.014107308048103609, "grad_norm": 4.9252400398254395, "learning_rate": 4.692307692307693e-06, "loss": 1.2383, "step": 183 }, { "epoch": 0.014184397163120567, "grad_norm": 5.004896640777588, "learning_rate": 4.717948717948718e-06, "loss": 1.1963, "step": 184 }, { "epoch": 0.014261486278137527, "grad_norm": 5.2026801109313965, "learning_rate": 4.743589743589744e-06, "loss": 1.4058, "step": 185 }, { "epoch": 0.014338575393154487, "grad_norm": 4.740382194519043, "learning_rate": 4.76923076923077e-06, "loss": 1.2259, "step": 186 }, { "epoch": 0.014415664508171446, "grad_norm": 4.338659763336182, "learning_rate": 4.7948717948717955e-06, "loss": 1.0815, "step": 187 }, { "epoch": 0.014492753623188406, "grad_norm": 5.065215110778809, "learning_rate": 4.820512820512821e-06, "loss": 1.2062, "step": 188 }, { "epoch": 0.014569842738205366, "grad_norm": 4.5627264976501465, "learning_rate": 4.8461538461538465e-06, "loss": 1.2032, "step": 189 }, { "epoch": 0.014646931853222325, "grad_norm": 5.128222465515137, "learning_rate": 4.871794871794872e-06, "loss": 1.3514, "step": 190 }, { "epoch": 0.014724020968239285, "grad_norm": 4.774002552032471, "learning_rate": 4.8974358974358975e-06, "loss": 1.2028, "step": 191 }, { "epoch": 0.014801110083256245, "grad_norm": 5.117179870605469, "learning_rate": 4.923076923076924e-06, "loss": 1.19, "step": 192 }, { "epoch": 0.014878199198273203, "grad_norm": 4.972648620605469, "learning_rate": 4.948717948717949e-06, "loss": 1.2141, "step": 193 }, { "epoch": 0.014955288313290164, "grad_norm": 4.6805219650268555, "learning_rate": 4.974358974358975e-06, "loss": 1.1642, "step": 194 }, { "epoch": 0.015032377428307124, "grad_norm": 4.577054023742676, "learning_rate": 5e-06, "loss": 1.28, "step": 195 }, { "epoch": 0.015109466543324082, "grad_norm": 4.717634677886963, "learning_rate": 5.025641025641026e-06, "loss": 1.2222, "step": 196 }, { "epoch": 0.015186555658341042, "grad_norm": 5.113860607147217, "learning_rate": 5.051282051282051e-06, "loss": 1.3246, "step": 197 }, { "epoch": 0.015263644773358002, "grad_norm": 5.178426742553711, "learning_rate": 5.076923076923077e-06, "loss": 1.2102, "step": 198 }, { "epoch": 0.015340733888374961, "grad_norm": 5.071588039398193, "learning_rate": 5.1025641025641024e-06, "loss": 1.2794, "step": 199 }, { "epoch": 0.015417823003391921, "grad_norm": 5.163846015930176, "learning_rate": 5.128205128205128e-06, "loss": 1.2664, "step": 200 }, { "epoch": 0.015494912118408881, "grad_norm": 4.933408260345459, "learning_rate": 5.1538461538461534e-06, "loss": 1.3126, "step": 201 }, { "epoch": 0.01557200123342584, "grad_norm": 5.551726341247559, "learning_rate": 5.179487179487181e-06, "loss": 1.4489, "step": 202 }, { "epoch": 0.0156490903484428, "grad_norm": 4.847557067871094, "learning_rate": 5.205128205128206e-06, "loss": 1.2887, "step": 203 }, { "epoch": 0.01572617946345976, "grad_norm": 5.323663234710693, "learning_rate": 5.230769230769232e-06, "loss": 1.271, "step": 204 }, { "epoch": 0.01580326857847672, "grad_norm": 4.9948201179504395, "learning_rate": 5.256410256410257e-06, "loss": 1.1802, "step": 205 }, { "epoch": 0.01588035769349368, "grad_norm": 5.172011375427246, "learning_rate": 5.282051282051283e-06, "loss": 1.1817, "step": 206 }, { "epoch": 0.015957446808510637, "grad_norm": 5.043161869049072, "learning_rate": 5.307692307692308e-06, "loss": 1.2308, "step": 207 }, { "epoch": 0.016034535923527597, "grad_norm": 4.964840888977051, "learning_rate": 5.333333333333334e-06, "loss": 1.314, "step": 208 }, { "epoch": 0.016111625038544557, "grad_norm": 5.201618194580078, "learning_rate": 5.358974358974359e-06, "loss": 1.2482, "step": 209 }, { "epoch": 0.016188714153561518, "grad_norm": 5.381608963012695, "learning_rate": 5.384615384615385e-06, "loss": 1.2502, "step": 210 }, { "epoch": 0.016265803268578478, "grad_norm": 5.2073187828063965, "learning_rate": 5.41025641025641e-06, "loss": 1.3514, "step": 211 }, { "epoch": 0.016342892383595438, "grad_norm": 5.278835773468018, "learning_rate": 5.435897435897436e-06, "loss": 1.313, "step": 212 }, { "epoch": 0.016419981498612395, "grad_norm": 5.034635543823242, "learning_rate": 5.461538461538461e-06, "loss": 1.1963, "step": 213 }, { "epoch": 0.016497070613629355, "grad_norm": 4.991930961608887, "learning_rate": 5.487179487179488e-06, "loss": 1.3247, "step": 214 }, { "epoch": 0.016574159728646315, "grad_norm": 5.150703430175781, "learning_rate": 5.512820512820514e-06, "loss": 1.2084, "step": 215 }, { "epoch": 0.016651248843663275, "grad_norm": 4.838485240936279, "learning_rate": 5.538461538461539e-06, "loss": 1.248, "step": 216 }, { "epoch": 0.016728337958680235, "grad_norm": 4.620452404022217, "learning_rate": 5.564102564102565e-06, "loss": 1.1368, "step": 217 }, { "epoch": 0.016805427073697195, "grad_norm": 4.712934494018555, "learning_rate": 5.58974358974359e-06, "loss": 1.2305, "step": 218 }, { "epoch": 0.016882516188714152, "grad_norm": 4.66661262512207, "learning_rate": 5.615384615384616e-06, "loss": 1.1054, "step": 219 }, { "epoch": 0.016959605303731112, "grad_norm": 5.062489986419678, "learning_rate": 5.641025641025641e-06, "loss": 1.2248, "step": 220 }, { "epoch": 0.017036694418748072, "grad_norm": 5.344682216644287, "learning_rate": 5.666666666666667e-06, "loss": 1.3622, "step": 221 }, { "epoch": 0.017113783533765033, "grad_norm": 5.120663166046143, "learning_rate": 5.692307692307692e-06, "loss": 1.1208, "step": 222 }, { "epoch": 0.017190872648781993, "grad_norm": 4.629307746887207, "learning_rate": 5.717948717948718e-06, "loss": 1.204, "step": 223 }, { "epoch": 0.017267961763798953, "grad_norm": 5.405148029327393, "learning_rate": 5.743589743589743e-06, "loss": 1.2831, "step": 224 }, { "epoch": 0.01734505087881591, "grad_norm": 4.422632217407227, "learning_rate": 5.769230769230769e-06, "loss": 1.2251, "step": 225 }, { "epoch": 0.01742213999383287, "grad_norm": 4.856597900390625, "learning_rate": 5.794871794871796e-06, "loss": 1.3051, "step": 226 }, { "epoch": 0.01749922910884983, "grad_norm": 4.129710674285889, "learning_rate": 5.820512820512822e-06, "loss": 1.0233, "step": 227 }, { "epoch": 0.01757631822386679, "grad_norm": 4.654583930969238, "learning_rate": 5.846153846153847e-06, "loss": 1.2445, "step": 228 }, { "epoch": 0.01765340733888375, "grad_norm": 4.510683536529541, "learning_rate": 5.871794871794873e-06, "loss": 1.1158, "step": 229 }, { "epoch": 0.01773049645390071, "grad_norm": 4.56011438369751, "learning_rate": 5.897435897435898e-06, "loss": 1.2008, "step": 230 }, { "epoch": 0.017807585568917667, "grad_norm": 4.707639694213867, "learning_rate": 5.923076923076924e-06, "loss": 1.2206, "step": 231 }, { "epoch": 0.017884674683934627, "grad_norm": 4.402471542358398, "learning_rate": 5.948717948717949e-06, "loss": 1.193, "step": 232 }, { "epoch": 0.017961763798951588, "grad_norm": 4.763029098510742, "learning_rate": 5.974358974358975e-06, "loss": 1.2553, "step": 233 }, { "epoch": 0.018038852913968548, "grad_norm": 4.92691707611084, "learning_rate": 6e-06, "loss": 1.2258, "step": 234 }, { "epoch": 0.018115942028985508, "grad_norm": 4.550500392913818, "learning_rate": 6.025641025641026e-06, "loss": 1.2513, "step": 235 }, { "epoch": 0.018193031144002468, "grad_norm": 4.7478251457214355, "learning_rate": 6.051282051282051e-06, "loss": 1.239, "step": 236 }, { "epoch": 0.018270120259019425, "grad_norm": 4.82733678817749, "learning_rate": 6.076923076923077e-06, "loss": 1.2731, "step": 237 }, { "epoch": 0.018347209374036385, "grad_norm": 4.546608924865723, "learning_rate": 6.102564102564104e-06, "loss": 1.1815, "step": 238 }, { "epoch": 0.018424298489053345, "grad_norm": 5.261005878448486, "learning_rate": 6.128205128205129e-06, "loss": 1.2074, "step": 239 }, { "epoch": 0.018501387604070305, "grad_norm": 4.9964213371276855, "learning_rate": 6.153846153846155e-06, "loss": 1.3077, "step": 240 }, { "epoch": 0.018578476719087265, "grad_norm": 4.793504238128662, "learning_rate": 6.17948717948718e-06, "loss": 1.1757, "step": 241 }, { "epoch": 0.018655565834104226, "grad_norm": 5.3314642906188965, "learning_rate": 6.205128205128206e-06, "loss": 1.3881, "step": 242 }, { "epoch": 0.018732654949121186, "grad_norm": 4.648246765136719, "learning_rate": 6.230769230769231e-06, "loss": 1.2445, "step": 243 }, { "epoch": 0.018809744064138142, "grad_norm": 4.99802827835083, "learning_rate": 6.256410256410257e-06, "loss": 1.27, "step": 244 }, { "epoch": 0.018886833179155103, "grad_norm": 4.553841590881348, "learning_rate": 6.282051282051282e-06, "loss": 1.1916, "step": 245 }, { "epoch": 0.018963922294172063, "grad_norm": 4.673896312713623, "learning_rate": 6.307692307692308e-06, "loss": 1.2046, "step": 246 }, { "epoch": 0.019041011409189023, "grad_norm": 5.352630615234375, "learning_rate": 6.333333333333333e-06, "loss": 1.3785, "step": 247 }, { "epoch": 0.019118100524205983, "grad_norm": 4.59555721282959, "learning_rate": 6.358974358974359e-06, "loss": 1.0682, "step": 248 }, { "epoch": 0.019195189639222943, "grad_norm": 5.211584568023682, "learning_rate": 6.384615384615384e-06, "loss": 1.3154, "step": 249 }, { "epoch": 0.0192722787542399, "grad_norm": 4.7507243156433105, "learning_rate": 6.410256410256412e-06, "loss": 1.1679, "step": 250 }, { "epoch": 0.01934936786925686, "grad_norm": 4.9127516746521, "learning_rate": 6.435897435897437e-06, "loss": 1.1803, "step": 251 }, { "epoch": 0.01942645698427382, "grad_norm": 5.001187801361084, "learning_rate": 6.461538461538463e-06, "loss": 1.2204, "step": 252 }, { "epoch": 0.01950354609929078, "grad_norm": 5.021228313446045, "learning_rate": 6.487179487179488e-06, "loss": 1.2126, "step": 253 }, { "epoch": 0.01958063521430774, "grad_norm": 4.73190975189209, "learning_rate": 6.512820512820514e-06, "loss": 1.1656, "step": 254 }, { "epoch": 0.0196577243293247, "grad_norm": 4.766290187835693, "learning_rate": 6.538461538461539e-06, "loss": 1.2573, "step": 255 }, { "epoch": 0.019734813444341658, "grad_norm": 4.61043643951416, "learning_rate": 6.564102564102565e-06, "loss": 1.2092, "step": 256 }, { "epoch": 0.019811902559358618, "grad_norm": 4.742149353027344, "learning_rate": 6.58974358974359e-06, "loss": 1.2197, "step": 257 }, { "epoch": 0.019888991674375578, "grad_norm": 4.5568389892578125, "learning_rate": 6.615384615384616e-06, "loss": 1.2677, "step": 258 }, { "epoch": 0.019966080789392538, "grad_norm": 4.9256367683410645, "learning_rate": 6.641025641025641e-06, "loss": 1.1879, "step": 259 }, { "epoch": 0.020043169904409498, "grad_norm": 5.019280433654785, "learning_rate": 6.666666666666667e-06, "loss": 1.1391, "step": 260 }, { "epoch": 0.02012025901942646, "grad_norm": 5.337982654571533, "learning_rate": 6.692307692307692e-06, "loss": 1.1885, "step": 261 }, { "epoch": 0.020197348134443415, "grad_norm": 5.246166229248047, "learning_rate": 6.717948717948718e-06, "loss": 1.2816, "step": 262 }, { "epoch": 0.020274437249460375, "grad_norm": 5.1329731941223145, "learning_rate": 6.743589743589745e-06, "loss": 1.0985, "step": 263 }, { "epoch": 0.020351526364477335, "grad_norm": 5.237858772277832, "learning_rate": 6.76923076923077e-06, "loss": 1.3882, "step": 264 }, { "epoch": 0.020428615479494296, "grad_norm": 5.131351470947266, "learning_rate": 6.794871794871796e-06, "loss": 1.1944, "step": 265 }, { "epoch": 0.020505704594511256, "grad_norm": 5.077263832092285, "learning_rate": 6.820512820512821e-06, "loss": 1.2041, "step": 266 }, { "epoch": 0.020582793709528216, "grad_norm": 5.028573036193848, "learning_rate": 6.846153846153847e-06, "loss": 1.248, "step": 267 }, { "epoch": 0.020659882824545173, "grad_norm": 5.382236480712891, "learning_rate": 6.871794871794872e-06, "loss": 1.3058, "step": 268 }, { "epoch": 0.020736971939562133, "grad_norm": 4.82020902633667, "learning_rate": 6.897435897435898e-06, "loss": 1.2581, "step": 269 }, { "epoch": 0.020814061054579093, "grad_norm": 4.485026836395264, "learning_rate": 6.923076923076923e-06, "loss": 1.1932, "step": 270 }, { "epoch": 0.020891150169596053, "grad_norm": 4.891682147979736, "learning_rate": 6.948717948717949e-06, "loss": 1.2035, "step": 271 }, { "epoch": 0.020968239284613013, "grad_norm": 4.9292683601379395, "learning_rate": 6.974358974358974e-06, "loss": 1.2209, "step": 272 }, { "epoch": 0.021045328399629974, "grad_norm": 4.799893379211426, "learning_rate": 7e-06, "loss": 1.3824, "step": 273 }, { "epoch": 0.02112241751464693, "grad_norm": 4.358574390411377, "learning_rate": 7.025641025641025e-06, "loss": 1.1656, "step": 274 }, { "epoch": 0.02119950662966389, "grad_norm": 4.752989768981934, "learning_rate": 7.051282051282053e-06, "loss": 1.1773, "step": 275 }, { "epoch": 0.02127659574468085, "grad_norm": 4.714949607849121, "learning_rate": 7.076923076923078e-06, "loss": 1.1622, "step": 276 }, { "epoch": 0.02135368485969781, "grad_norm": 4.488297939300537, "learning_rate": 7.102564102564104e-06, "loss": 1.1412, "step": 277 }, { "epoch": 0.02143077397471477, "grad_norm": 4.944022178649902, "learning_rate": 7.128205128205129e-06, "loss": 1.1468, "step": 278 }, { "epoch": 0.02150786308973173, "grad_norm": 4.799287796020508, "learning_rate": 7.153846153846155e-06, "loss": 1.1958, "step": 279 }, { "epoch": 0.021584952204748688, "grad_norm": 4.447544574737549, "learning_rate": 7.17948717948718e-06, "loss": 1.3105, "step": 280 }, { "epoch": 0.021662041319765648, "grad_norm": 4.781645774841309, "learning_rate": 7.205128205128206e-06, "loss": 1.3335, "step": 281 }, { "epoch": 0.021739130434782608, "grad_norm": 5.0389018058776855, "learning_rate": 7.230769230769231e-06, "loss": 1.2225, "step": 282 }, { "epoch": 0.021816219549799568, "grad_norm": 5.132277011871338, "learning_rate": 7.256410256410257e-06, "loss": 1.2914, "step": 283 }, { "epoch": 0.02189330866481653, "grad_norm": 5.015186309814453, "learning_rate": 7.282051282051282e-06, "loss": 1.1996, "step": 284 }, { "epoch": 0.02197039777983349, "grad_norm": 5.795614242553711, "learning_rate": 7.307692307692308e-06, "loss": 1.251, "step": 285 }, { "epoch": 0.02204748689485045, "grad_norm": 4.816180229187012, "learning_rate": 7.333333333333333e-06, "loss": 1.2001, "step": 286 }, { "epoch": 0.022124576009867405, "grad_norm": 4.811607837677002, "learning_rate": 7.35897435897436e-06, "loss": 1.2148, "step": 287 }, { "epoch": 0.022201665124884366, "grad_norm": 4.706379413604736, "learning_rate": 7.384615384615386e-06, "loss": 1.249, "step": 288 }, { "epoch": 0.022278754239901326, "grad_norm": 5.7372894287109375, "learning_rate": 7.410256410256411e-06, "loss": 1.3184, "step": 289 }, { "epoch": 0.022355843354918286, "grad_norm": 4.672527313232422, "learning_rate": 7.435897435897437e-06, "loss": 1.1606, "step": 290 }, { "epoch": 0.022432932469935246, "grad_norm": 4.6642560958862305, "learning_rate": 7.461538461538462e-06, "loss": 1.2362, "step": 291 }, { "epoch": 0.022510021584952206, "grad_norm": 4.640800952911377, "learning_rate": 7.487179487179488e-06, "loss": 1.237, "step": 292 }, { "epoch": 0.022587110699969163, "grad_norm": 5.178220272064209, "learning_rate": 7.512820512820513e-06, "loss": 1.1799, "step": 293 }, { "epoch": 0.022664199814986123, "grad_norm": 4.580568790435791, "learning_rate": 7.538461538461539e-06, "loss": 1.2442, "step": 294 }, { "epoch": 0.022741288930003083, "grad_norm": 4.631171226501465, "learning_rate": 7.564102564102564e-06, "loss": 1.2399, "step": 295 }, { "epoch": 0.022818378045020044, "grad_norm": 4.3523101806640625, "learning_rate": 7.58974358974359e-06, "loss": 1.1657, "step": 296 }, { "epoch": 0.022895467160037004, "grad_norm": 5.684840202331543, "learning_rate": 7.615384615384615e-06, "loss": 1.2356, "step": 297 }, { "epoch": 0.022972556275053964, "grad_norm": 4.739688873291016, "learning_rate": 7.641025641025641e-06, "loss": 1.2422, "step": 298 }, { "epoch": 0.02304964539007092, "grad_norm": 4.905622482299805, "learning_rate": 7.666666666666667e-06, "loss": 1.3017, "step": 299 }, { "epoch": 0.02312673450508788, "grad_norm": 4.725374221801758, "learning_rate": 7.692307692307694e-06, "loss": 1.3432, "step": 300 }, { "epoch": 0.02320382362010484, "grad_norm": 4.97802209854126, "learning_rate": 7.717948717948718e-06, "loss": 1.2064, "step": 301 }, { "epoch": 0.0232809127351218, "grad_norm": 4.46768856048584, "learning_rate": 7.743589743589745e-06, "loss": 1.1462, "step": 302 }, { "epoch": 0.02335800185013876, "grad_norm": 4.416783809661865, "learning_rate": 7.76923076923077e-06, "loss": 1.2325, "step": 303 }, { "epoch": 0.02343509096515572, "grad_norm": 5.031586647033691, "learning_rate": 7.794871794871796e-06, "loss": 1.2013, "step": 304 }, { "epoch": 0.023512180080172678, "grad_norm": 5.045072555541992, "learning_rate": 7.820512820512822e-06, "loss": 1.1312, "step": 305 }, { "epoch": 0.02358926919518964, "grad_norm": 4.986116886138916, "learning_rate": 7.846153846153847e-06, "loss": 1.1599, "step": 306 }, { "epoch": 0.0236663583102066, "grad_norm": 5.043438911437988, "learning_rate": 7.871794871794873e-06, "loss": 1.2043, "step": 307 }, { "epoch": 0.02374344742522356, "grad_norm": 4.92976188659668, "learning_rate": 7.897435897435898e-06, "loss": 1.2303, "step": 308 }, { "epoch": 0.02382053654024052, "grad_norm": 5.960122585296631, "learning_rate": 7.923076923076924e-06, "loss": 1.203, "step": 309 }, { "epoch": 0.02389762565525748, "grad_norm": 5.050799369812012, "learning_rate": 7.948717948717949e-06, "loss": 1.3918, "step": 310 }, { "epoch": 0.023974714770274436, "grad_norm": 5.031795978546143, "learning_rate": 7.974358974358975e-06, "loss": 1.2533, "step": 311 }, { "epoch": 0.024051803885291396, "grad_norm": 4.596309185028076, "learning_rate": 8.000000000000001e-06, "loss": 1.1893, "step": 312 }, { "epoch": 0.024128893000308356, "grad_norm": 4.877758979797363, "learning_rate": 8.025641025641026e-06, "loss": 1.2398, "step": 313 }, { "epoch": 0.024205982115325316, "grad_norm": 4.588964462280273, "learning_rate": 8.051282051282052e-06, "loss": 1.1656, "step": 314 }, { "epoch": 0.024283071230342276, "grad_norm": 4.996614456176758, "learning_rate": 8.076923076923077e-06, "loss": 1.2567, "step": 315 }, { "epoch": 0.024360160345359236, "grad_norm": 4.830722332000732, "learning_rate": 8.102564102564103e-06, "loss": 1.2854, "step": 316 }, { "epoch": 0.024437249460376193, "grad_norm": 5.052509784698486, "learning_rate": 8.12820512820513e-06, "loss": 1.1399, "step": 317 }, { "epoch": 0.024514338575393153, "grad_norm": 5.15505838394165, "learning_rate": 8.153846153846154e-06, "loss": 1.1843, "step": 318 }, { "epoch": 0.024591427690410114, "grad_norm": 4.3718671798706055, "learning_rate": 8.17948717948718e-06, "loss": 1.2297, "step": 319 }, { "epoch": 0.024668516805427074, "grad_norm": 5.402135372161865, "learning_rate": 8.205128205128205e-06, "loss": 1.1474, "step": 320 }, { "epoch": 0.024745605920444034, "grad_norm": 4.940793991088867, "learning_rate": 8.230769230769232e-06, "loss": 1.2823, "step": 321 }, { "epoch": 0.024822695035460994, "grad_norm": 4.902515888214111, "learning_rate": 8.256410256410256e-06, "loss": 1.3237, "step": 322 }, { "epoch": 0.024899784150477954, "grad_norm": 4.852400779724121, "learning_rate": 8.282051282051283e-06, "loss": 1.1324, "step": 323 }, { "epoch": 0.02497687326549491, "grad_norm": 4.758856296539307, "learning_rate": 8.307692307692309e-06, "loss": 1.2108, "step": 324 }, { "epoch": 0.02505396238051187, "grad_norm": 4.814808368682861, "learning_rate": 8.333333333333334e-06, "loss": 1.2657, "step": 325 }, { "epoch": 0.02513105149552883, "grad_norm": 4.526566028594971, "learning_rate": 8.35897435897436e-06, "loss": 1.2241, "step": 326 }, { "epoch": 0.02520814061054579, "grad_norm": 5.531708717346191, "learning_rate": 8.384615384615385e-06, "loss": 1.3275, "step": 327 }, { "epoch": 0.02528522972556275, "grad_norm": 4.945657253265381, "learning_rate": 8.410256410256411e-06, "loss": 1.274, "step": 328 }, { "epoch": 0.025362318840579712, "grad_norm": 4.540581703186035, "learning_rate": 8.435897435897436e-06, "loss": 1.266, "step": 329 }, { "epoch": 0.02543940795559667, "grad_norm": 4.201421737670898, "learning_rate": 8.461538461538462e-06, "loss": 1.2355, "step": 330 }, { "epoch": 0.02551649707061363, "grad_norm": 4.603540420532227, "learning_rate": 8.487179487179488e-06, "loss": 1.2529, "step": 331 }, { "epoch": 0.02559358618563059, "grad_norm": 4.829327583312988, "learning_rate": 8.512820512820513e-06, "loss": 1.3069, "step": 332 }, { "epoch": 0.02567067530064755, "grad_norm": 4.978155612945557, "learning_rate": 8.53846153846154e-06, "loss": 1.1573, "step": 333 }, { "epoch": 0.02574776441566451, "grad_norm": 4.506424427032471, "learning_rate": 8.564102564102564e-06, "loss": 1.1211, "step": 334 }, { "epoch": 0.02582485353068147, "grad_norm": 5.001166343688965, "learning_rate": 8.58974358974359e-06, "loss": 1.1474, "step": 335 }, { "epoch": 0.025901942645698426, "grad_norm": 4.721814155578613, "learning_rate": 8.615384615384617e-06, "loss": 1.3049, "step": 336 }, { "epoch": 0.025979031760715386, "grad_norm": 4.9357008934021, "learning_rate": 8.641025641025641e-06, "loss": 1.2877, "step": 337 }, { "epoch": 0.026056120875732346, "grad_norm": 4.828952789306641, "learning_rate": 8.666666666666668e-06, "loss": 1.1093, "step": 338 }, { "epoch": 0.026133209990749307, "grad_norm": 4.813793659210205, "learning_rate": 8.692307692307692e-06, "loss": 1.2452, "step": 339 }, { "epoch": 0.026210299105766267, "grad_norm": 4.303406715393066, "learning_rate": 8.717948717948719e-06, "loss": 1.0951, "step": 340 }, { "epoch": 0.026287388220783227, "grad_norm": 4.791975498199463, "learning_rate": 8.743589743589743e-06, "loss": 1.2548, "step": 341 }, { "epoch": 0.026364477335800184, "grad_norm": 4.471169948577881, "learning_rate": 8.76923076923077e-06, "loss": 1.112, "step": 342 }, { "epoch": 0.026441566450817144, "grad_norm": 4.681336879730225, "learning_rate": 8.794871794871796e-06, "loss": 1.2183, "step": 343 }, { "epoch": 0.026518655565834104, "grad_norm": 5.055361270904541, "learning_rate": 8.820512820512821e-06, "loss": 1.1957, "step": 344 }, { "epoch": 0.026595744680851064, "grad_norm": 4.525293827056885, "learning_rate": 8.846153846153847e-06, "loss": 1.1316, "step": 345 }, { "epoch": 0.026672833795868024, "grad_norm": 4.73473596572876, "learning_rate": 8.871794871794872e-06, "loss": 1.2229, "step": 346 }, { "epoch": 0.026749922910884984, "grad_norm": 4.338306427001953, "learning_rate": 8.897435897435898e-06, "loss": 1.2271, "step": 347 }, { "epoch": 0.02682701202590194, "grad_norm": 4.502996444702148, "learning_rate": 8.923076923076925e-06, "loss": 1.2868, "step": 348 }, { "epoch": 0.0269041011409189, "grad_norm": 4.5346198081970215, "learning_rate": 8.94871794871795e-06, "loss": 1.0935, "step": 349 }, { "epoch": 0.02698119025593586, "grad_norm": 4.440368175506592, "learning_rate": 8.974358974358976e-06, "loss": 1.1267, "step": 350 }, { "epoch": 0.02705827937095282, "grad_norm": 4.624384880065918, "learning_rate": 9e-06, "loss": 1.215, "step": 351 }, { "epoch": 0.027135368485969782, "grad_norm": 4.52625846862793, "learning_rate": 9.025641025641027e-06, "loss": 1.2398, "step": 352 }, { "epoch": 0.027212457600986742, "grad_norm": 4.4209818840026855, "learning_rate": 9.051282051282051e-06, "loss": 1.1526, "step": 353 }, { "epoch": 0.0272895467160037, "grad_norm": 4.647578239440918, "learning_rate": 9.076923076923078e-06, "loss": 1.2131, "step": 354 }, { "epoch": 0.02736663583102066, "grad_norm": 3.973903179168701, "learning_rate": 9.102564102564104e-06, "loss": 1.0613, "step": 355 }, { "epoch": 0.02744372494603762, "grad_norm": 5.1517252922058105, "learning_rate": 9.128205128205129e-06, "loss": 1.1746, "step": 356 }, { "epoch": 0.02752081406105458, "grad_norm": 4.37183952331543, "learning_rate": 9.153846153846155e-06, "loss": 1.2774, "step": 357 }, { "epoch": 0.02759790317607154, "grad_norm": 4.5695295333862305, "learning_rate": 9.17948717948718e-06, "loss": 1.1766, "step": 358 }, { "epoch": 0.0276749922910885, "grad_norm": 4.399728775024414, "learning_rate": 9.205128205128206e-06, "loss": 1.2096, "step": 359 }, { "epoch": 0.027752081406105456, "grad_norm": 4.5362043380737305, "learning_rate": 9.230769230769232e-06, "loss": 1.1005, "step": 360 }, { "epoch": 0.027829170521122416, "grad_norm": 4.581974983215332, "learning_rate": 9.256410256410257e-06, "loss": 1.1755, "step": 361 }, { "epoch": 0.027906259636139377, "grad_norm": 5.120265960693359, "learning_rate": 9.282051282051283e-06, "loss": 1.1285, "step": 362 }, { "epoch": 0.027983348751156337, "grad_norm": 4.422695636749268, "learning_rate": 9.307692307692308e-06, "loss": 1.2343, "step": 363 }, { "epoch": 0.028060437866173297, "grad_norm": 4.639062881469727, "learning_rate": 9.333333333333334e-06, "loss": 1.2733, "step": 364 }, { "epoch": 0.028137526981190257, "grad_norm": 5.054222106933594, "learning_rate": 9.358974358974359e-06, "loss": 1.1177, "step": 365 }, { "epoch": 0.028214616096207217, "grad_norm": 5.064762592315674, "learning_rate": 9.384615384615385e-06, "loss": 1.1147, "step": 366 }, { "epoch": 0.028291705211224174, "grad_norm": 4.730871200561523, "learning_rate": 9.410256410256412e-06, "loss": 1.3437, "step": 367 }, { "epoch": 0.028368794326241134, "grad_norm": 4.744129180908203, "learning_rate": 9.435897435897436e-06, "loss": 1.3698, "step": 368 }, { "epoch": 0.028445883441258094, "grad_norm": 4.724253177642822, "learning_rate": 9.461538461538463e-06, "loss": 1.1685, "step": 369 }, { "epoch": 0.028522972556275054, "grad_norm": 4.356321334838867, "learning_rate": 9.487179487179487e-06, "loss": 1.154, "step": 370 }, { "epoch": 0.028600061671292015, "grad_norm": 4.506900310516357, "learning_rate": 9.512820512820514e-06, "loss": 1.233, "step": 371 }, { "epoch": 0.028677150786308975, "grad_norm": 4.505119800567627, "learning_rate": 9.53846153846154e-06, "loss": 1.3049, "step": 372 }, { "epoch": 0.02875423990132593, "grad_norm": 4.503683090209961, "learning_rate": 9.564102564102565e-06, "loss": 1.1666, "step": 373 }, { "epoch": 0.02883132901634289, "grad_norm": 4.283431529998779, "learning_rate": 9.589743589743591e-06, "loss": 1.1497, "step": 374 }, { "epoch": 0.028908418131359852, "grad_norm": 5.00119686126709, "learning_rate": 9.615384615384616e-06, "loss": 1.2709, "step": 375 }, { "epoch": 0.028985507246376812, "grad_norm": 4.850398540496826, "learning_rate": 9.641025641025642e-06, "loss": 1.2135, "step": 376 }, { "epoch": 0.029062596361393772, "grad_norm": 5.15523624420166, "learning_rate": 9.666666666666667e-06, "loss": 1.1788, "step": 377 }, { "epoch": 0.029139685476410732, "grad_norm": 4.818391799926758, "learning_rate": 9.692307692307693e-06, "loss": 1.1626, "step": 378 }, { "epoch": 0.02921677459142769, "grad_norm": 5.061548233032227, "learning_rate": 9.71794871794872e-06, "loss": 1.2423, "step": 379 }, { "epoch": 0.02929386370644465, "grad_norm": 4.615644931793213, "learning_rate": 9.743589743589744e-06, "loss": 1.1704, "step": 380 }, { "epoch": 0.02937095282146161, "grad_norm": 4.75278902053833, "learning_rate": 9.76923076923077e-06, "loss": 1.1854, "step": 381 }, { "epoch": 0.02944804193647857, "grad_norm": 4.710444450378418, "learning_rate": 9.794871794871795e-06, "loss": 1.2158, "step": 382 }, { "epoch": 0.02952513105149553, "grad_norm": 4.5607709884643555, "learning_rate": 9.820512820512821e-06, "loss": 1.1627, "step": 383 }, { "epoch": 0.02960222016651249, "grad_norm": 4.754578113555908, "learning_rate": 9.846153846153848e-06, "loss": 1.3299, "step": 384 }, { "epoch": 0.029679309281529447, "grad_norm": 4.9476542472839355, "learning_rate": 9.871794871794872e-06, "loss": 1.1626, "step": 385 }, { "epoch": 0.029756398396546407, "grad_norm": 4.63112211227417, "learning_rate": 9.897435897435899e-06, "loss": 1.2714, "step": 386 }, { "epoch": 0.029833487511563367, "grad_norm": 4.136993885040283, "learning_rate": 9.923076923076923e-06, "loss": 1.1761, "step": 387 }, { "epoch": 0.029910576626580327, "grad_norm": 4.531764984130859, "learning_rate": 9.94871794871795e-06, "loss": 1.1452, "step": 388 }, { "epoch": 0.029987665741597287, "grad_norm": 5.1062774658203125, "learning_rate": 9.974358974358974e-06, "loss": 1.2642, "step": 389 }, { "epoch": 0.030064754856614247, "grad_norm": 4.920534610748291, "learning_rate": 1e-05, "loss": 1.2648, "step": 390 }, { "epoch": 0.030141843971631204, "grad_norm": 5.01254415512085, "learning_rate": 9.99999984413795e-06, "loss": 1.1768, "step": 391 }, { "epoch": 0.030218933086648164, "grad_norm": 4.516940116882324, "learning_rate": 9.999999376551802e-06, "loss": 1.303, "step": 392 }, { "epoch": 0.030296022201665124, "grad_norm": 4.425420761108398, "learning_rate": 9.99999859724159e-06, "loss": 1.3071, "step": 393 }, { "epoch": 0.030373111316682085, "grad_norm": 5.187442302703857, "learning_rate": 9.999997506207361e-06, "loss": 1.3249, "step": 394 }, { "epoch": 0.030450200431699045, "grad_norm": 4.589512348175049, "learning_rate": 9.999996103449184e-06, "loss": 1.2048, "step": 395 }, { "epoch": 0.030527289546716005, "grad_norm": 4.254446983337402, "learning_rate": 9.999994388967143e-06, "loss": 1.2693, "step": 396 }, { "epoch": 0.03060437866173296, "grad_norm": 4.697052478790283, "learning_rate": 9.999992362761349e-06, "loss": 1.2305, "step": 397 }, { "epoch": 0.030681467776749922, "grad_norm": 4.343505382537842, "learning_rate": 9.999990024831926e-06, "loss": 1.2191, "step": 398 }, { "epoch": 0.030758556891766882, "grad_norm": 4.3586015701293945, "learning_rate": 9.999987375179023e-06, "loss": 1.197, "step": 399 }, { "epoch": 0.030835646006783842, "grad_norm": 4.417754173278809, "learning_rate": 9.999984413802802e-06, "loss": 1.2121, "step": 400 }, { "epoch": 0.030912735121800802, "grad_norm": 4.340965747833252, "learning_rate": 9.999981140703447e-06, "loss": 1.1745, "step": 401 }, { "epoch": 0.030989824236817762, "grad_norm": 4.833497047424316, "learning_rate": 9.999977555881163e-06, "loss": 1.2646, "step": 402 }, { "epoch": 0.031066913351834723, "grad_norm": 4.495340824127197, "learning_rate": 9.999973659336176e-06, "loss": 1.1996, "step": 403 }, { "epoch": 0.03114400246685168, "grad_norm": 4.7980475425720215, "learning_rate": 9.999969451068725e-06, "loss": 1.2813, "step": 404 }, { "epoch": 0.03122109158186864, "grad_norm": 4.268666744232178, "learning_rate": 9.999964931079076e-06, "loss": 1.2938, "step": 405 }, { "epoch": 0.0312981806968856, "grad_norm": 4.3926777839660645, "learning_rate": 9.999960099367507e-06, "loss": 1.236, "step": 406 }, { "epoch": 0.031375269811902556, "grad_norm": 4.350759029388428, "learning_rate": 9.999954955934323e-06, "loss": 1.1252, "step": 407 }, { "epoch": 0.03145235892691952, "grad_norm": 4.4295220375061035, "learning_rate": 9.999949500779842e-06, "loss": 1.2229, "step": 408 }, { "epoch": 0.03152944804193648, "grad_norm": 4.337591171264648, "learning_rate": 9.999943733904404e-06, "loss": 1.1971, "step": 409 }, { "epoch": 0.03160653715695344, "grad_norm": 4.42830753326416, "learning_rate": 9.999937655308373e-06, "loss": 1.1197, "step": 410 }, { "epoch": 0.0316836262719704, "grad_norm": 5.042124271392822, "learning_rate": 9.999931264992122e-06, "loss": 1.1953, "step": 411 }, { "epoch": 0.03176071538698736, "grad_norm": 4.555182933807373, "learning_rate": 9.999924562956052e-06, "loss": 1.2661, "step": 412 }, { "epoch": 0.03183780450200432, "grad_norm": 4.939544677734375, "learning_rate": 9.999917549200581e-06, "loss": 1.2496, "step": 413 }, { "epoch": 0.031914893617021274, "grad_norm": 4.4207539558410645, "learning_rate": 9.999910223726146e-06, "loss": 1.2442, "step": 414 }, { "epoch": 0.03199198273203824, "grad_norm": 4.292929649353027, "learning_rate": 9.999902586533205e-06, "loss": 1.1871, "step": 415 }, { "epoch": 0.032069071847055194, "grad_norm": 4.947376251220703, "learning_rate": 9.999894637622231e-06, "loss": 1.1849, "step": 416 }, { "epoch": 0.03214616096207216, "grad_norm": 4.807240009307861, "learning_rate": 9.999886376993723e-06, "loss": 1.3515, "step": 417 }, { "epoch": 0.032223250077089115, "grad_norm": 4.25323486328125, "learning_rate": 9.999877804648194e-06, "loss": 1.1788, "step": 418 }, { "epoch": 0.03230033919210607, "grad_norm": 4.476768970489502, "learning_rate": 9.999868920586178e-06, "loss": 1.1857, "step": 419 }, { "epoch": 0.032377428307123035, "grad_norm": 4.515340805053711, "learning_rate": 9.999859724808231e-06, "loss": 1.1074, "step": 420 }, { "epoch": 0.03245451742213999, "grad_norm": 4.648842811584473, "learning_rate": 9.999850217314924e-06, "loss": 1.2364, "step": 421 }, { "epoch": 0.032531606537156955, "grad_norm": 4.573676109313965, "learning_rate": 9.999840398106852e-06, "loss": 1.3302, "step": 422 }, { "epoch": 0.03260869565217391, "grad_norm": 4.733384132385254, "learning_rate": 9.999830267184625e-06, "loss": 1.2797, "step": 423 }, { "epoch": 0.032685784767190876, "grad_norm": 4.6709303855896, "learning_rate": 9.999819824548875e-06, "loss": 1.2243, "step": 424 }, { "epoch": 0.03276287388220783, "grad_norm": 4.113539695739746, "learning_rate": 9.999809070200256e-06, "loss": 1.0434, "step": 425 }, { "epoch": 0.03283996299722479, "grad_norm": 4.282526969909668, "learning_rate": 9.999798004139435e-06, "loss": 1.2439, "step": 426 }, { "epoch": 0.03291705211224175, "grad_norm": 4.742609977722168, "learning_rate": 9.999786626367102e-06, "loss": 1.3161, "step": 427 }, { "epoch": 0.03299414122725871, "grad_norm": 4.848706245422363, "learning_rate": 9.999774936883969e-06, "loss": 1.2556, "step": 428 }, { "epoch": 0.03307123034227567, "grad_norm": 5.125500202178955, "learning_rate": 9.999762935690762e-06, "loss": 1.2509, "step": 429 }, { "epoch": 0.03314831945729263, "grad_norm": 4.103264808654785, "learning_rate": 9.999750622788232e-06, "loss": 1.3005, "step": 430 }, { "epoch": 0.03322540857230959, "grad_norm": 4.143542289733887, "learning_rate": 9.999737998177144e-06, "loss": 1.1695, "step": 431 }, { "epoch": 0.03330249768732655, "grad_norm": 4.100920677185059, "learning_rate": 9.999725061858286e-06, "loss": 1.23, "step": 432 }, { "epoch": 0.03337958680234351, "grad_norm": 4.6039299964904785, "learning_rate": 9.999711813832465e-06, "loss": 1.1966, "step": 433 }, { "epoch": 0.03345667591736047, "grad_norm": 5.09745979309082, "learning_rate": 9.999698254100506e-06, "loss": 1.197, "step": 434 }, { "epoch": 0.03353376503237743, "grad_norm": 4.1532769203186035, "learning_rate": 9.999684382663254e-06, "loss": 1.0883, "step": 435 }, { "epoch": 0.03361085414739439, "grad_norm": 4.5222697257995605, "learning_rate": 9.999670199521577e-06, "loss": 1.2341, "step": 436 }, { "epoch": 0.03368794326241135, "grad_norm": 4.2074294090271, "learning_rate": 9.999655704676357e-06, "loss": 1.3028, "step": 437 }, { "epoch": 0.033765032377428304, "grad_norm": 4.485882759094238, "learning_rate": 9.999640898128495e-06, "loss": 1.2003, "step": 438 }, { "epoch": 0.03384212149244527, "grad_norm": 4.506419658660889, "learning_rate": 9.999625779878918e-06, "loss": 1.2515, "step": 439 }, { "epoch": 0.033919210607462225, "grad_norm": 4.3678436279296875, "learning_rate": 9.99961034992857e-06, "loss": 1.2829, "step": 440 }, { "epoch": 0.03399629972247919, "grad_norm": 4.121408462524414, "learning_rate": 9.999594608278407e-06, "loss": 1.1691, "step": 441 }, { "epoch": 0.034073388837496145, "grad_norm": 4.504795074462891, "learning_rate": 9.999578554929415e-06, "loss": 1.1358, "step": 442 }, { "epoch": 0.03415047795251311, "grad_norm": 4.258967876434326, "learning_rate": 9.999562189882594e-06, "loss": 1.1833, "step": 443 }, { "epoch": 0.034227567067530065, "grad_norm": 4.424483776092529, "learning_rate": 9.999545513138964e-06, "loss": 1.2847, "step": 444 }, { "epoch": 0.03430465618254702, "grad_norm": 4.2883687019348145, "learning_rate": 9.999528524699563e-06, "loss": 1.1569, "step": 445 }, { "epoch": 0.034381745297563986, "grad_norm": 4.397137641906738, "learning_rate": 9.999511224565453e-06, "loss": 1.1686, "step": 446 }, { "epoch": 0.03445883441258094, "grad_norm": 4.637729644775391, "learning_rate": 9.999493612737712e-06, "loss": 1.2365, "step": 447 }, { "epoch": 0.034535923527597906, "grad_norm": 4.562082290649414, "learning_rate": 9.999475689217434e-06, "loss": 1.1553, "step": 448 }, { "epoch": 0.03461301264261486, "grad_norm": 4.052862644195557, "learning_rate": 9.999457454005743e-06, "loss": 1.1823, "step": 449 }, { "epoch": 0.03469010175763182, "grad_norm": 4.691180229187012, "learning_rate": 9.999438907103772e-06, "loss": 1.2576, "step": 450 }, { "epoch": 0.03476719087264878, "grad_norm": 4.515274524688721, "learning_rate": 9.999420048512678e-06, "loss": 1.1499, "step": 451 }, { "epoch": 0.03484427998766574, "grad_norm": 4.499693393707275, "learning_rate": 9.999400878233636e-06, "loss": 1.1628, "step": 452 }, { "epoch": 0.0349213691026827, "grad_norm": 4.56060266494751, "learning_rate": 9.999381396267841e-06, "loss": 1.2407, "step": 453 }, { "epoch": 0.03499845821769966, "grad_norm": 4.329947471618652, "learning_rate": 9.99936160261651e-06, "loss": 1.1197, "step": 454 }, { "epoch": 0.035075547332716624, "grad_norm": 4.710033416748047, "learning_rate": 9.999341497280875e-06, "loss": 1.2235, "step": 455 }, { "epoch": 0.03515263644773358, "grad_norm": 4.5141167640686035, "learning_rate": 9.99932108026219e-06, "loss": 1.2179, "step": 456 }, { "epoch": 0.03522972556275054, "grad_norm": 4.853623390197754, "learning_rate": 9.999300351561727e-06, "loss": 1.2093, "step": 457 }, { "epoch": 0.0353068146777675, "grad_norm": 4.331170558929443, "learning_rate": 9.999279311180779e-06, "loss": 1.3841, "step": 458 }, { "epoch": 0.03538390379278446, "grad_norm": 4.377639293670654, "learning_rate": 9.999257959120658e-06, "loss": 1.2103, "step": 459 }, { "epoch": 0.03546099290780142, "grad_norm": 4.253114700317383, "learning_rate": 9.999236295382695e-06, "loss": 1.1429, "step": 460 }, { "epoch": 0.03553808202281838, "grad_norm": 4.613873481750488, "learning_rate": 9.999214319968242e-06, "loss": 1.2951, "step": 461 }, { "epoch": 0.035615171137835334, "grad_norm": 4.3931498527526855, "learning_rate": 9.999192032878667e-06, "loss": 1.223, "step": 462 }, { "epoch": 0.0356922602528523, "grad_norm": 4.192042827606201, "learning_rate": 9.99916943411536e-06, "loss": 1.1564, "step": 463 }, { "epoch": 0.035769349367869255, "grad_norm": 4.241518974304199, "learning_rate": 9.99914652367973e-06, "loss": 1.1852, "step": 464 }, { "epoch": 0.03584643848288622, "grad_norm": 4.183101177215576, "learning_rate": 9.999123301573208e-06, "loss": 1.2052, "step": 465 }, { "epoch": 0.035923527597903175, "grad_norm": 4.752673149108887, "learning_rate": 9.999099767797236e-06, "loss": 1.1693, "step": 466 }, { "epoch": 0.03600061671292014, "grad_norm": 4.796976089477539, "learning_rate": 9.999075922353286e-06, "loss": 1.3416, "step": 467 }, { "epoch": 0.036077705827937095, "grad_norm": 4.705587863922119, "learning_rate": 9.999051765242843e-06, "loss": 1.2008, "step": 468 }, { "epoch": 0.03615479494295405, "grad_norm": 4.491495609283447, "learning_rate": 9.999027296467412e-06, "loss": 1.1668, "step": 469 }, { "epoch": 0.036231884057971016, "grad_norm": 4.496662139892578, "learning_rate": 9.99900251602852e-06, "loss": 1.2627, "step": 470 }, { "epoch": 0.03630897317298797, "grad_norm": 4.168951511383057, "learning_rate": 9.998977423927714e-06, "loss": 1.1625, "step": 471 }, { "epoch": 0.036386062288004936, "grad_norm": 4.014923095703125, "learning_rate": 9.998952020166554e-06, "loss": 1.1659, "step": 472 }, { "epoch": 0.03646315140302189, "grad_norm": 4.847691059112549, "learning_rate": 9.998926304746626e-06, "loss": 1.1743, "step": 473 }, { "epoch": 0.03654024051803885, "grad_norm": 4.13588809967041, "learning_rate": 9.998900277669531e-06, "loss": 1.1655, "step": 474 }, { "epoch": 0.03661732963305581, "grad_norm": 4.349898815155029, "learning_rate": 9.998873938936897e-06, "loss": 1.2163, "step": 475 }, { "epoch": 0.03669441874807277, "grad_norm": 4.191585540771484, "learning_rate": 9.998847288550363e-06, "loss": 1.1745, "step": 476 }, { "epoch": 0.036771507863089734, "grad_norm": 4.346571922302246, "learning_rate": 9.998820326511587e-06, "loss": 1.1258, "step": 477 }, { "epoch": 0.03684859697810669, "grad_norm": 4.2652459144592285, "learning_rate": 9.998793052822256e-06, "loss": 1.2362, "step": 478 }, { "epoch": 0.036925686093123654, "grad_norm": 4.184598445892334, "learning_rate": 9.998765467484066e-06, "loss": 1.1744, "step": 479 }, { "epoch": 0.03700277520814061, "grad_norm": 4.4486260414123535, "learning_rate": 9.998737570498737e-06, "loss": 1.1812, "step": 480 }, { "epoch": 0.03707986432315757, "grad_norm": 4.7114410400390625, "learning_rate": 9.998709361868012e-06, "loss": 1.2171, "step": 481 }, { "epoch": 0.03715695343817453, "grad_norm": 4.759378910064697, "learning_rate": 9.998680841593647e-06, "loss": 1.2212, "step": 482 }, { "epoch": 0.03723404255319149, "grad_norm": 4.3762922286987305, "learning_rate": 9.998652009677421e-06, "loss": 1.1983, "step": 483 }, { "epoch": 0.03731113166820845, "grad_norm": 4.322701454162598, "learning_rate": 9.998622866121129e-06, "loss": 1.2557, "step": 484 }, { "epoch": 0.03738822078322541, "grad_norm": 4.573087692260742, "learning_rate": 9.998593410926591e-06, "loss": 1.2969, "step": 485 }, { "epoch": 0.03746530989824237, "grad_norm": 4.229235649108887, "learning_rate": 9.998563644095642e-06, "loss": 1.2178, "step": 486 }, { "epoch": 0.03754239901325933, "grad_norm": 4.301479339599609, "learning_rate": 9.998533565630138e-06, "loss": 1.2294, "step": 487 }, { "epoch": 0.037619488128276285, "grad_norm": 4.079572677612305, "learning_rate": 9.998503175531955e-06, "loss": 1.2311, "step": 488 }, { "epoch": 0.03769657724329325, "grad_norm": 5.241011619567871, "learning_rate": 9.998472473802984e-06, "loss": 1.2433, "step": 489 }, { "epoch": 0.037773666358310205, "grad_norm": 4.18917989730835, "learning_rate": 9.998441460445145e-06, "loss": 1.3185, "step": 490 }, { "epoch": 0.03785075547332717, "grad_norm": 4.833195209503174, "learning_rate": 9.998410135460368e-06, "loss": 1.2041, "step": 491 }, { "epoch": 0.037927844588344126, "grad_norm": 4.499256610870361, "learning_rate": 9.998378498850605e-06, "loss": 1.2148, "step": 492 }, { "epoch": 0.03800493370336108, "grad_norm": 4.2141947746276855, "learning_rate": 9.998346550617833e-06, "loss": 1.1423, "step": 493 }, { "epoch": 0.038082022818378046, "grad_norm": 4.486852169036865, "learning_rate": 9.998314290764038e-06, "loss": 1.2306, "step": 494 }, { "epoch": 0.038159111933395, "grad_norm": 4.096106052398682, "learning_rate": 9.998281719291234e-06, "loss": 1.113, "step": 495 }, { "epoch": 0.038236201048411966, "grad_norm": 4.3826985359191895, "learning_rate": 9.998248836201452e-06, "loss": 1.1976, "step": 496 }, { "epoch": 0.03831329016342892, "grad_norm": 4.423654079437256, "learning_rate": 9.998215641496743e-06, "loss": 1.1227, "step": 497 }, { "epoch": 0.03839037927844589, "grad_norm": 3.936204671859741, "learning_rate": 9.998182135179173e-06, "loss": 1.1393, "step": 498 }, { "epoch": 0.03846746839346284, "grad_norm": 4.782196521759033, "learning_rate": 9.998148317250835e-06, "loss": 1.1804, "step": 499 }, { "epoch": 0.0385445575084798, "grad_norm": 4.143787384033203, "learning_rate": 9.998114187713834e-06, "loss": 1.1194, "step": 500 }, { "epoch": 0.038621646623496764, "grad_norm": 4.448570728302002, "learning_rate": 9.998079746570299e-06, "loss": 1.2344, "step": 501 }, { "epoch": 0.03869873573851372, "grad_norm": 4.438679218292236, "learning_rate": 9.99804499382238e-06, "loss": 1.3456, "step": 502 }, { "epoch": 0.038775824853530684, "grad_norm": 4.5678606033325195, "learning_rate": 9.99800992947224e-06, "loss": 1.196, "step": 503 }, { "epoch": 0.03885291396854764, "grad_norm": 4.333302021026611, "learning_rate": 9.997974553522066e-06, "loss": 1.2677, "step": 504 }, { "epoch": 0.0389300030835646, "grad_norm": 4.986594200134277, "learning_rate": 9.997938865974063e-06, "loss": 1.2973, "step": 505 }, { "epoch": 0.03900709219858156, "grad_norm": 4.153099536895752, "learning_rate": 9.997902866830459e-06, "loss": 1.1358, "step": 506 }, { "epoch": 0.03908418131359852, "grad_norm": 4.446223735809326, "learning_rate": 9.997866556093491e-06, "loss": 1.2034, "step": 507 }, { "epoch": 0.03916127042861548, "grad_norm": 4.689149379730225, "learning_rate": 9.997829933765433e-06, "loss": 1.242, "step": 508 }, { "epoch": 0.03923835954363244, "grad_norm": 4.689703941345215, "learning_rate": 9.997792999848561e-06, "loss": 1.2383, "step": 509 }, { "epoch": 0.0393154486586494, "grad_norm": 4.7472429275512695, "learning_rate": 9.997755754345179e-06, "loss": 1.3211, "step": 510 }, { "epoch": 0.03939253777366636, "grad_norm": 4.735124111175537, "learning_rate": 9.997718197257612e-06, "loss": 1.2133, "step": 511 }, { "epoch": 0.039469626888683315, "grad_norm": 4.425952911376953, "learning_rate": 9.997680328588198e-06, "loss": 1.1441, "step": 512 }, { "epoch": 0.03954671600370028, "grad_norm": 4.210488319396973, "learning_rate": 9.9976421483393e-06, "loss": 1.2533, "step": 513 }, { "epoch": 0.039623805118717235, "grad_norm": 4.078352451324463, "learning_rate": 9.997603656513295e-06, "loss": 1.179, "step": 514 }, { "epoch": 0.0397008942337342, "grad_norm": 4.232064247131348, "learning_rate": 9.997564853112586e-06, "loss": 1.2968, "step": 515 }, { "epoch": 0.039777983348751156, "grad_norm": 4.38081169128418, "learning_rate": 9.997525738139595e-06, "loss": 1.1834, "step": 516 }, { "epoch": 0.03985507246376811, "grad_norm": 4.6666388511657715, "learning_rate": 9.997486311596754e-06, "loss": 1.16, "step": 517 }, { "epoch": 0.039932161578785076, "grad_norm": 4.461724758148193, "learning_rate": 9.997446573486524e-06, "loss": 1.2314, "step": 518 }, { "epoch": 0.04000925069380203, "grad_norm": 4.663398265838623, "learning_rate": 9.997406523811383e-06, "loss": 1.1742, "step": 519 }, { "epoch": 0.040086339808818996, "grad_norm": 4.144864559173584, "learning_rate": 9.997366162573828e-06, "loss": 1.1362, "step": 520 }, { "epoch": 0.04016342892383595, "grad_norm": 4.398372650146484, "learning_rate": 9.997325489776375e-06, "loss": 1.1925, "step": 521 }, { "epoch": 0.04024051803885292, "grad_norm": 4.672181606292725, "learning_rate": 9.997284505421559e-06, "loss": 1.2667, "step": 522 }, { "epoch": 0.040317607153869874, "grad_norm": 4.4285569190979, "learning_rate": 9.997243209511935e-06, "loss": 1.3158, "step": 523 }, { "epoch": 0.04039469626888683, "grad_norm": 4.436081886291504, "learning_rate": 9.99720160205008e-06, "loss": 1.1041, "step": 524 }, { "epoch": 0.040471785383903794, "grad_norm": 4.347367286682129, "learning_rate": 9.997159683038584e-06, "loss": 1.1595, "step": 525 }, { "epoch": 0.04054887449892075, "grad_norm": 4.27985143661499, "learning_rate": 9.997117452480063e-06, "loss": 1.283, "step": 526 }, { "epoch": 0.040625963613937714, "grad_norm": 4.468918800354004, "learning_rate": 9.997074910377151e-06, "loss": 1.1987, "step": 527 }, { "epoch": 0.04070305272895467, "grad_norm": 4.384061813354492, "learning_rate": 9.997032056732498e-06, "loss": 1.1361, "step": 528 }, { "epoch": 0.040780141843971635, "grad_norm": 4.80853271484375, "learning_rate": 9.996988891548777e-06, "loss": 1.2749, "step": 529 }, { "epoch": 0.04085723095898859, "grad_norm": 4.914130210876465, "learning_rate": 9.99694541482868e-06, "loss": 1.4303, "step": 530 }, { "epoch": 0.04093432007400555, "grad_norm": 4.093295097351074, "learning_rate": 9.996901626574915e-06, "loss": 1.155, "step": 531 }, { "epoch": 0.04101140918902251, "grad_norm": 4.31951379776001, "learning_rate": 9.996857526790212e-06, "loss": 1.206, "step": 532 }, { "epoch": 0.04108849830403947, "grad_norm": 6.6202521324157715, "learning_rate": 9.996813115477323e-06, "loss": 1.1679, "step": 533 }, { "epoch": 0.04116558741905643, "grad_norm": 4.6812334060668945, "learning_rate": 9.996768392639015e-06, "loss": 1.205, "step": 534 }, { "epoch": 0.04124267653407339, "grad_norm": 4.417919158935547, "learning_rate": 9.996723358278077e-06, "loss": 1.2173, "step": 535 }, { "epoch": 0.041319765649090345, "grad_norm": 4.809331893920898, "learning_rate": 9.996678012397317e-06, "loss": 1.2465, "step": 536 }, { "epoch": 0.04139685476410731, "grad_norm": 4.317751884460449, "learning_rate": 9.99663235499956e-06, "loss": 1.1406, "step": 537 }, { "epoch": 0.041473943879124266, "grad_norm": 4.429257392883301, "learning_rate": 9.996586386087653e-06, "loss": 1.2509, "step": 538 }, { "epoch": 0.04155103299414123, "grad_norm": 4.199450969696045, "learning_rate": 9.996540105664464e-06, "loss": 1.1988, "step": 539 }, { "epoch": 0.041628122109158186, "grad_norm": 4.068055152893066, "learning_rate": 9.996493513732878e-06, "loss": 1.212, "step": 540 }, { "epoch": 0.04170521122417515, "grad_norm": 4.471511363983154, "learning_rate": 9.996446610295797e-06, "loss": 1.2122, "step": 541 }, { "epoch": 0.041782300339192106, "grad_norm": 4.128568172454834, "learning_rate": 9.996399395356149e-06, "loss": 1.2008, "step": 542 }, { "epoch": 0.04185938945420906, "grad_norm": 4.495331764221191, "learning_rate": 9.996351868916874e-06, "loss": 1.2308, "step": 543 }, { "epoch": 0.04193647856922603, "grad_norm": 4.763047218322754, "learning_rate": 9.996304030980939e-06, "loss": 1.2247, "step": 544 }, { "epoch": 0.04201356768424298, "grad_norm": 4.249602317810059, "learning_rate": 9.996255881551322e-06, "loss": 1.2335, "step": 545 }, { "epoch": 0.04209065679925995, "grad_norm": 4.013154983520508, "learning_rate": 9.996207420631029e-06, "loss": 1.0859, "step": 546 }, { "epoch": 0.042167745914276904, "grad_norm": 4.579375743865967, "learning_rate": 9.996158648223077e-06, "loss": 1.1609, "step": 547 }, { "epoch": 0.04224483502929386, "grad_norm": 4.4072747230529785, "learning_rate": 9.996109564330513e-06, "loss": 1.2142, "step": 548 }, { "epoch": 0.042321924144310824, "grad_norm": 5.151735782623291, "learning_rate": 9.99606016895639e-06, "loss": 1.2514, "step": 549 }, { "epoch": 0.04239901325932778, "grad_norm": 4.274787902832031, "learning_rate": 9.996010462103791e-06, "loss": 1.087, "step": 550 }, { "epoch": 0.042476102374344744, "grad_norm": 4.291683197021484, "learning_rate": 9.995960443775814e-06, "loss": 1.0762, "step": 551 }, { "epoch": 0.0425531914893617, "grad_norm": 4.732009410858154, "learning_rate": 9.99591011397558e-06, "loss": 1.2677, "step": 552 }, { "epoch": 0.042630280604378665, "grad_norm": 4.424540996551514, "learning_rate": 9.995859472706223e-06, "loss": 1.3005, "step": 553 }, { "epoch": 0.04270736971939562, "grad_norm": 4.752233505249023, "learning_rate": 9.995808519970902e-06, "loss": 1.278, "step": 554 }, { "epoch": 0.04278445883441258, "grad_norm": 3.9832558631896973, "learning_rate": 9.995757255772795e-06, "loss": 1.1136, "step": 555 }, { "epoch": 0.04286154794942954, "grad_norm": 4.465639114379883, "learning_rate": 9.995705680115098e-06, "loss": 1.1282, "step": 556 }, { "epoch": 0.0429386370644465, "grad_norm": 4.6259446144104, "learning_rate": 9.995653793001023e-06, "loss": 1.2864, "step": 557 }, { "epoch": 0.04301572617946346, "grad_norm": 3.95756459236145, "learning_rate": 9.995601594433808e-06, "loss": 1.1661, "step": 558 }, { "epoch": 0.04309281529448042, "grad_norm": 4.663955211639404, "learning_rate": 9.995549084416706e-06, "loss": 1.1438, "step": 559 }, { "epoch": 0.043169904409497376, "grad_norm": 4.4860615730285645, "learning_rate": 9.995496262952993e-06, "loss": 1.3304, "step": 560 }, { "epoch": 0.04324699352451434, "grad_norm": 4.189501762390137, "learning_rate": 9.995443130045958e-06, "loss": 1.1138, "step": 561 }, { "epoch": 0.043324082639531296, "grad_norm": 4.266587734222412, "learning_rate": 9.995389685698918e-06, "loss": 1.1725, "step": 562 }, { "epoch": 0.04340117175454826, "grad_norm": 4.076508045196533, "learning_rate": 9.995335929915202e-06, "loss": 1.2068, "step": 563 }, { "epoch": 0.043478260869565216, "grad_norm": 4.271369934082031, "learning_rate": 9.995281862698161e-06, "loss": 1.2824, "step": 564 }, { "epoch": 0.04355534998458218, "grad_norm": 4.289413928985596, "learning_rate": 9.995227484051168e-06, "loss": 1.1504, "step": 565 }, { "epoch": 0.043632439099599137, "grad_norm": 4.292287826538086, "learning_rate": 9.995172793977613e-06, "loss": 1.1755, "step": 566 }, { "epoch": 0.04370952821461609, "grad_norm": 4.293979167938232, "learning_rate": 9.995117792480903e-06, "loss": 1.1852, "step": 567 }, { "epoch": 0.04378661732963306, "grad_norm": 4.438719272613525, "learning_rate": 9.995062479564472e-06, "loss": 1.2362, "step": 568 }, { "epoch": 0.043863706444650014, "grad_norm": 4.045646667480469, "learning_rate": 9.995006855231763e-06, "loss": 1.1894, "step": 569 }, { "epoch": 0.04394079555966698, "grad_norm": 4.521982669830322, "learning_rate": 9.994950919486248e-06, "loss": 1.2075, "step": 570 }, { "epoch": 0.044017884674683934, "grad_norm": 4.346794605255127, "learning_rate": 9.994894672331413e-06, "loss": 1.0808, "step": 571 }, { "epoch": 0.0440949737897009, "grad_norm": 3.817455291748047, "learning_rate": 9.994838113770762e-06, "loss": 1.1479, "step": 572 }, { "epoch": 0.044172062904717854, "grad_norm": 4.4810566902160645, "learning_rate": 9.994781243807825e-06, "loss": 1.1589, "step": 573 }, { "epoch": 0.04424915201973481, "grad_norm": 4.639770030975342, "learning_rate": 9.994724062446145e-06, "loss": 1.2405, "step": 574 }, { "epoch": 0.044326241134751775, "grad_norm": 3.9520983695983887, "learning_rate": 9.994666569689291e-06, "loss": 1.106, "step": 575 }, { "epoch": 0.04440333024976873, "grad_norm": 4.4173407554626465, "learning_rate": 9.994608765540842e-06, "loss": 1.1598, "step": 576 }, { "epoch": 0.044480419364785695, "grad_norm": 3.9496123790740967, "learning_rate": 9.994550650004404e-06, "loss": 1.2553, "step": 577 }, { "epoch": 0.04455750847980265, "grad_norm": 4.36904239654541, "learning_rate": 9.994492223083603e-06, "loss": 1.2274, "step": 578 }, { "epoch": 0.04463459759481961, "grad_norm": 4.925714015960693, "learning_rate": 9.994433484782076e-06, "loss": 1.3003, "step": 579 }, { "epoch": 0.04471168670983657, "grad_norm": 4.471517086029053, "learning_rate": 9.994374435103489e-06, "loss": 1.3164, "step": 580 }, { "epoch": 0.04478877582485353, "grad_norm": 4.279041290283203, "learning_rate": 9.994315074051525e-06, "loss": 1.1954, "step": 581 }, { "epoch": 0.04486586493987049, "grad_norm": 4.745578289031982, "learning_rate": 9.994255401629878e-06, "loss": 1.2623, "step": 582 }, { "epoch": 0.04494295405488745, "grad_norm": 4.342554569244385, "learning_rate": 9.994195417842276e-06, "loss": 1.3354, "step": 583 }, { "epoch": 0.04502004316990441, "grad_norm": 4.38990592956543, "learning_rate": 9.994135122692454e-06, "loss": 1.2993, "step": 584 }, { "epoch": 0.04509713228492137, "grad_norm": 4.334774971008301, "learning_rate": 9.99407451618417e-06, "loss": 1.2366, "step": 585 }, { "epoch": 0.045174221399938326, "grad_norm": 4.226545333862305, "learning_rate": 9.994013598321208e-06, "loss": 1.2286, "step": 586 }, { "epoch": 0.04525131051495529, "grad_norm": 4.3245391845703125, "learning_rate": 9.993952369107363e-06, "loss": 1.2417, "step": 587 }, { "epoch": 0.045328399629972246, "grad_norm": 4.576898097991943, "learning_rate": 9.99389082854645e-06, "loss": 1.3929, "step": 588 }, { "epoch": 0.04540548874498921, "grad_norm": 4.47149658203125, "learning_rate": 9.993828976642311e-06, "loss": 1.2802, "step": 589 }, { "epoch": 0.04548257786000617, "grad_norm": 4.014610290527344, "learning_rate": 9.993766813398796e-06, "loss": 1.1964, "step": 590 }, { "epoch": 0.04555966697502312, "grad_norm": 4.17547607421875, "learning_rate": 9.993704338819786e-06, "loss": 1.2604, "step": 591 }, { "epoch": 0.04563675609004009, "grad_norm": 4.3998823165893555, "learning_rate": 9.993641552909172e-06, "loss": 1.2443, "step": 592 }, { "epoch": 0.045713845205057044, "grad_norm": 4.851663589477539, "learning_rate": 9.99357845567087e-06, "loss": 1.3904, "step": 593 }, { "epoch": 0.04579093432007401, "grad_norm": 4.441982746124268, "learning_rate": 9.993515047108813e-06, "loss": 1.2615, "step": 594 }, { "epoch": 0.045868023435090964, "grad_norm": 4.005769729614258, "learning_rate": 9.993451327226955e-06, "loss": 1.1988, "step": 595 }, { "epoch": 0.04594511255010793, "grad_norm": 4.6932454109191895, "learning_rate": 9.99338729602927e-06, "loss": 1.1599, "step": 596 }, { "epoch": 0.046022201665124884, "grad_norm": 4.53994083404541, "learning_rate": 9.993322953519749e-06, "loss": 1.1581, "step": 597 }, { "epoch": 0.04609929078014184, "grad_norm": 4.505131721496582, "learning_rate": 9.9932582997024e-06, "loss": 1.2383, "step": 598 }, { "epoch": 0.046176379895158805, "grad_norm": 4.228984355926514, "learning_rate": 9.993193334581259e-06, "loss": 1.3035, "step": 599 }, { "epoch": 0.04625346901017576, "grad_norm": 4.068345546722412, "learning_rate": 9.993128058160373e-06, "loss": 1.1668, "step": 600 }, { "epoch": 0.046330558125192725, "grad_norm": 4.136154651641846, "learning_rate": 9.993062470443814e-06, "loss": 1.2611, "step": 601 }, { "epoch": 0.04640764724020968, "grad_norm": 3.893854856491089, "learning_rate": 9.992996571435668e-06, "loss": 1.0874, "step": 602 }, { "epoch": 0.046484736355226645, "grad_norm": 3.9865612983703613, "learning_rate": 9.992930361140045e-06, "loss": 1.1812, "step": 603 }, { "epoch": 0.0465618254702436, "grad_norm": 4.678125381469727, "learning_rate": 9.992863839561076e-06, "loss": 1.2949, "step": 604 }, { "epoch": 0.04663891458526056, "grad_norm": 4.224588871002197, "learning_rate": 9.992797006702902e-06, "loss": 1.1542, "step": 605 }, { "epoch": 0.04671600370027752, "grad_norm": 4.713046073913574, "learning_rate": 9.992729862569694e-06, "loss": 1.2428, "step": 606 }, { "epoch": 0.04679309281529448, "grad_norm": 4.193148612976074, "learning_rate": 9.992662407165637e-06, "loss": 1.2829, "step": 607 }, { "epoch": 0.04687018193031144, "grad_norm": 4.588696479797363, "learning_rate": 9.992594640494937e-06, "loss": 1.3168, "step": 608 }, { "epoch": 0.0469472710453284, "grad_norm": 3.988802671432495, "learning_rate": 9.99252656256182e-06, "loss": 1.1058, "step": 609 }, { "epoch": 0.047024360160345356, "grad_norm": 4.400096893310547, "learning_rate": 9.992458173370525e-06, "loss": 1.3421, "step": 610 }, { "epoch": 0.04710144927536232, "grad_norm": 4.6701483726501465, "learning_rate": 9.992389472925323e-06, "loss": 1.2688, "step": 611 }, { "epoch": 0.04717853839037928, "grad_norm": 4.002415180206299, "learning_rate": 9.99232046123049e-06, "loss": 1.163, "step": 612 }, { "epoch": 0.04725562750539624, "grad_norm": 4.165858268737793, "learning_rate": 9.992251138290336e-06, "loss": 1.157, "step": 613 }, { "epoch": 0.0473327166204132, "grad_norm": 4.390151023864746, "learning_rate": 9.992181504109177e-06, "loss": 1.147, "step": 614 }, { "epoch": 0.04740980573543016, "grad_norm": 4.475891590118408, "learning_rate": 9.992111558691357e-06, "loss": 1.1861, "step": 615 }, { "epoch": 0.04748689485044712, "grad_norm": 4.247600555419922, "learning_rate": 9.992041302041238e-06, "loss": 1.0589, "step": 616 }, { "epoch": 0.047563983965464074, "grad_norm": 4.422875881195068, "learning_rate": 9.991970734163195e-06, "loss": 1.1923, "step": 617 }, { "epoch": 0.04764107308048104, "grad_norm": 4.901252746582031, "learning_rate": 9.991899855061633e-06, "loss": 1.1754, "step": 618 }, { "epoch": 0.047718162195497994, "grad_norm": 4.342195510864258, "learning_rate": 9.991828664740969e-06, "loss": 1.1996, "step": 619 }, { "epoch": 0.04779525131051496, "grad_norm": 4.116771697998047, "learning_rate": 9.991757163205638e-06, "loss": 1.1596, "step": 620 }, { "epoch": 0.047872340425531915, "grad_norm": 4.246543884277344, "learning_rate": 9.991685350460102e-06, "loss": 1.2953, "step": 621 }, { "epoch": 0.04794942954054887, "grad_norm": 4.528024673461914, "learning_rate": 9.991613226508838e-06, "loss": 1.1836, "step": 622 }, { "epoch": 0.048026518655565835, "grad_norm": 4.674347877502441, "learning_rate": 9.991540791356342e-06, "loss": 1.2399, "step": 623 }, { "epoch": 0.04810360777058279, "grad_norm": 3.99076247215271, "learning_rate": 9.99146804500713e-06, "loss": 1.1949, "step": 624 }, { "epoch": 0.048180696885599755, "grad_norm": 4.242091178894043, "learning_rate": 9.991394987465734e-06, "loss": 1.2284, "step": 625 }, { "epoch": 0.04825778600061671, "grad_norm": 4.19851016998291, "learning_rate": 9.991321618736713e-06, "loss": 1.2451, "step": 626 }, { "epoch": 0.048334875115633676, "grad_norm": 4.685157299041748, "learning_rate": 9.991247938824641e-06, "loss": 1.1317, "step": 627 }, { "epoch": 0.04841196423065063, "grad_norm": 3.7727530002593994, "learning_rate": 9.991173947734109e-06, "loss": 1.1234, "step": 628 }, { "epoch": 0.04848905334566759, "grad_norm": 4.488282203674316, "learning_rate": 9.991099645469733e-06, "loss": 1.1491, "step": 629 }, { "epoch": 0.04856614246068455, "grad_norm": 4.348680019378662, "learning_rate": 9.991025032036141e-06, "loss": 1.273, "step": 630 }, { "epoch": 0.04864323157570151, "grad_norm": 4.021731853485107, "learning_rate": 9.990950107437989e-06, "loss": 1.092, "step": 631 }, { "epoch": 0.04872032069071847, "grad_norm": 3.9224579334259033, "learning_rate": 9.990874871679948e-06, "loss": 1.1298, "step": 632 }, { "epoch": 0.04879740980573543, "grad_norm": 4.388615608215332, "learning_rate": 9.990799324766704e-06, "loss": 1.3351, "step": 633 }, { "epoch": 0.048874498920752386, "grad_norm": 4.323206901550293, "learning_rate": 9.990723466702972e-06, "loss": 1.3192, "step": 634 }, { "epoch": 0.04895158803576935, "grad_norm": 4.259064674377441, "learning_rate": 9.99064729749348e-06, "loss": 1.2824, "step": 635 }, { "epoch": 0.04902867715078631, "grad_norm": 4.554323673248291, "learning_rate": 9.990570817142974e-06, "loss": 1.302, "step": 636 }, { "epoch": 0.04910576626580327, "grad_norm": 4.136668682098389, "learning_rate": 9.990494025656227e-06, "loss": 1.2416, "step": 637 }, { "epoch": 0.04918285538082023, "grad_norm": 4.030932426452637, "learning_rate": 9.990416923038022e-06, "loss": 1.2393, "step": 638 }, { "epoch": 0.04925994449583719, "grad_norm": 4.34704065322876, "learning_rate": 9.990339509293169e-06, "loss": 1.175, "step": 639 }, { "epoch": 0.04933703361085415, "grad_norm": 4.10532808303833, "learning_rate": 9.990261784426494e-06, "loss": 1.0991, "step": 640 }, { "epoch": 0.049414122725871104, "grad_norm": 4.257048606872559, "learning_rate": 9.99018374844284e-06, "loss": 1.2731, "step": 641 }, { "epoch": 0.04949121184088807, "grad_norm": 4.365378379821777, "learning_rate": 9.990105401347075e-06, "loss": 1.1691, "step": 642 }, { "epoch": 0.049568300955905024, "grad_norm": 5.146248817443848, "learning_rate": 9.990026743144084e-06, "loss": 1.1753, "step": 643 }, { "epoch": 0.04964539007092199, "grad_norm": 4.770695686340332, "learning_rate": 9.989947773838766e-06, "loss": 1.3062, "step": 644 }, { "epoch": 0.049722479185938945, "grad_norm": 4.065369606018066, "learning_rate": 9.989868493436052e-06, "loss": 1.1505, "step": 645 }, { "epoch": 0.04979956830095591, "grad_norm": 3.940561294555664, "learning_rate": 9.989788901940878e-06, "loss": 1.1703, "step": 646 }, { "epoch": 0.049876657415972865, "grad_norm": 4.689897537231445, "learning_rate": 9.98970899935821e-06, "loss": 1.2645, "step": 647 }, { "epoch": 0.04995374653098982, "grad_norm": 3.9842276573181152, "learning_rate": 9.989628785693027e-06, "loss": 1.2856, "step": 648 }, { "epoch": 0.050030835646006785, "grad_norm": 4.0935773849487305, "learning_rate": 9.989548260950332e-06, "loss": 1.2049, "step": 649 }, { "epoch": 0.05010792476102374, "grad_norm": 4.526483058929443, "learning_rate": 9.989467425135145e-06, "loss": 1.2788, "step": 650 }, { "epoch": 0.050185013876040706, "grad_norm": 4.23220682144165, "learning_rate": 9.989386278252505e-06, "loss": 1.2215, "step": 651 }, { "epoch": 0.05026210299105766, "grad_norm": 4.511578559875488, "learning_rate": 9.989304820307469e-06, "loss": 1.2454, "step": 652 }, { "epoch": 0.05033919210607462, "grad_norm": 3.9522716999053955, "learning_rate": 9.98922305130512e-06, "loss": 1.1437, "step": 653 }, { "epoch": 0.05041628122109158, "grad_norm": 4.068902492523193, "learning_rate": 9.989140971250553e-06, "loss": 1.1963, "step": 654 }, { "epoch": 0.05049337033610854, "grad_norm": 4.930382251739502, "learning_rate": 9.989058580148885e-06, "loss": 1.2457, "step": 655 }, { "epoch": 0.0505704594511255, "grad_norm": 3.9273664951324463, "learning_rate": 9.988975878005256e-06, "loss": 1.1408, "step": 656 }, { "epoch": 0.05064754856614246, "grad_norm": 4.123290061950684, "learning_rate": 9.988892864824817e-06, "loss": 1.2368, "step": 657 }, { "epoch": 0.050724637681159424, "grad_norm": 3.8407251834869385, "learning_rate": 9.988809540612747e-06, "loss": 1.2847, "step": 658 }, { "epoch": 0.05080172679617638, "grad_norm": 4.222978115081787, "learning_rate": 9.988725905374242e-06, "loss": 1.1422, "step": 659 }, { "epoch": 0.05087881591119334, "grad_norm": 4.184953689575195, "learning_rate": 9.988641959114512e-06, "loss": 1.1177, "step": 660 }, { "epoch": 0.0509559050262103, "grad_norm": 4.532317161560059, "learning_rate": 9.988557701838791e-06, "loss": 1.2804, "step": 661 }, { "epoch": 0.05103299414122726, "grad_norm": 4.091014385223389, "learning_rate": 9.988473133552336e-06, "loss": 1.1743, "step": 662 }, { "epoch": 0.05111008325624422, "grad_norm": 4.535579204559326, "learning_rate": 9.988388254260417e-06, "loss": 1.161, "step": 663 }, { "epoch": 0.05118717237126118, "grad_norm": 4.38757848739624, "learning_rate": 9.988303063968325e-06, "loss": 1.2166, "step": 664 }, { "epoch": 0.051264261486278134, "grad_norm": 4.727634906768799, "learning_rate": 9.988217562681373e-06, "loss": 1.1831, "step": 665 }, { "epoch": 0.0513413506012951, "grad_norm": 4.283156871795654, "learning_rate": 9.988131750404889e-06, "loss": 1.2051, "step": 666 }, { "epoch": 0.051418439716312055, "grad_norm": 4.735593318939209, "learning_rate": 9.988045627144226e-06, "loss": 1.2458, "step": 667 }, { "epoch": 0.05149552883132902, "grad_norm": 3.9615490436553955, "learning_rate": 9.98795919290475e-06, "loss": 1.3039, "step": 668 }, { "epoch": 0.051572617946345975, "grad_norm": 4.121702671051025, "learning_rate": 9.987872447691853e-06, "loss": 1.1451, "step": 669 }, { "epoch": 0.05164970706136294, "grad_norm": 4.138181209564209, "learning_rate": 9.987785391510943e-06, "loss": 1.2745, "step": 670 }, { "epoch": 0.051726796176379895, "grad_norm": 4.431112289428711, "learning_rate": 9.987698024367444e-06, "loss": 1.2789, "step": 671 }, { "epoch": 0.05180388529139685, "grad_norm": 4.268251895904541, "learning_rate": 9.987610346266808e-06, "loss": 1.128, "step": 672 }, { "epoch": 0.051880974406413816, "grad_norm": 4.024346351623535, "learning_rate": 9.987522357214496e-06, "loss": 1.1851, "step": 673 }, { "epoch": 0.05195806352143077, "grad_norm": 4.691232681274414, "learning_rate": 9.987434057215996e-06, "loss": 1.1774, "step": 674 }, { "epoch": 0.052035152636447736, "grad_norm": 4.2596635818481445, "learning_rate": 9.987345446276815e-06, "loss": 1.1782, "step": 675 }, { "epoch": 0.05211224175146469, "grad_norm": 4.4914231300354, "learning_rate": 9.987256524402473e-06, "loss": 1.3086, "step": 676 }, { "epoch": 0.05218933086648165, "grad_norm": 4.257656097412109, "learning_rate": 9.987167291598518e-06, "loss": 1.2465, "step": 677 }, { "epoch": 0.05226641998149861, "grad_norm": 4.148374080657959, "learning_rate": 9.987077747870512e-06, "loss": 1.2106, "step": 678 }, { "epoch": 0.05234350909651557, "grad_norm": 4.15828275680542, "learning_rate": 9.986987893224036e-06, "loss": 1.2535, "step": 679 }, { "epoch": 0.05242059821153253, "grad_norm": 4.436988353729248, "learning_rate": 9.986897727664693e-06, "loss": 1.2644, "step": 680 }, { "epoch": 0.05249768732654949, "grad_norm": 4.664417266845703, "learning_rate": 9.986807251198105e-06, "loss": 1.3725, "step": 681 }, { "epoch": 0.052574776441566454, "grad_norm": 4.027350425720215, "learning_rate": 9.986716463829913e-06, "loss": 1.2051, "step": 682 }, { "epoch": 0.05265186555658341, "grad_norm": 4.5459818840026855, "learning_rate": 9.986625365565776e-06, "loss": 1.3047, "step": 683 }, { "epoch": 0.05272895467160037, "grad_norm": 4.167656898498535, "learning_rate": 9.986533956411373e-06, "loss": 1.1959, "step": 684 }, { "epoch": 0.05280604378661733, "grad_norm": 4.064361095428467, "learning_rate": 9.986442236372404e-06, "loss": 1.1627, "step": 685 }, { "epoch": 0.05288313290163429, "grad_norm": 4.358853816986084, "learning_rate": 9.986350205454587e-06, "loss": 1.1343, "step": 686 }, { "epoch": 0.05296022201665125, "grad_norm": 3.9868597984313965, "learning_rate": 9.98625786366366e-06, "loss": 1.1268, "step": 687 }, { "epoch": 0.05303731113166821, "grad_norm": 3.7771265506744385, "learning_rate": 9.98616521100538e-06, "loss": 1.1284, "step": 688 }, { "epoch": 0.05311440024668517, "grad_norm": 4.3471574783325195, "learning_rate": 9.986072247485523e-06, "loss": 1.2269, "step": 689 }, { "epoch": 0.05319148936170213, "grad_norm": 3.956973075866699, "learning_rate": 9.985978973109885e-06, "loss": 1.2157, "step": 690 }, { "epoch": 0.053268578476719085, "grad_norm": 4.183494567871094, "learning_rate": 9.98588538788428e-06, "loss": 1.2887, "step": 691 }, { "epoch": 0.05334566759173605, "grad_norm": 4.123586654663086, "learning_rate": 9.985791491814545e-06, "loss": 1.1641, "step": 692 }, { "epoch": 0.053422756706753005, "grad_norm": 4.9690680503845215, "learning_rate": 9.985697284906532e-06, "loss": 1.221, "step": 693 }, { "epoch": 0.05349984582176997, "grad_norm": 3.8592402935028076, "learning_rate": 9.985602767166114e-06, "loss": 1.2379, "step": 694 }, { "epoch": 0.053576934936786925, "grad_norm": 4.504653453826904, "learning_rate": 9.985507938599186e-06, "loss": 1.2235, "step": 695 }, { "epoch": 0.05365402405180388, "grad_norm": 4.732753753662109, "learning_rate": 9.985412799211658e-06, "loss": 1.2707, "step": 696 }, { "epoch": 0.053731113166820846, "grad_norm": 3.8671162128448486, "learning_rate": 9.98531734900946e-06, "loss": 1.282, "step": 697 }, { "epoch": 0.0538082022818378, "grad_norm": 4.004355430603027, "learning_rate": 9.985221587998549e-06, "loss": 1.1439, "step": 698 }, { "epoch": 0.053885291396854766, "grad_norm": 4.045041084289551, "learning_rate": 9.985125516184889e-06, "loss": 1.1859, "step": 699 }, { "epoch": 0.05396238051187172, "grad_norm": 4.078817367553711, "learning_rate": 9.985029133574471e-06, "loss": 1.231, "step": 700 }, { "epoch": 0.054039469626888686, "grad_norm": 4.425305366516113, "learning_rate": 9.984932440173304e-06, "loss": 1.2619, "step": 701 }, { "epoch": 0.05411655874190564, "grad_norm": 4.202983856201172, "learning_rate": 9.98483543598742e-06, "loss": 1.1445, "step": 702 }, { "epoch": 0.0541936478569226, "grad_norm": 4.361485958099365, "learning_rate": 9.984738121022861e-06, "loss": 1.2118, "step": 703 }, { "epoch": 0.054270736971939564, "grad_norm": 4.436924934387207, "learning_rate": 9.9846404952857e-06, "loss": 1.3065, "step": 704 }, { "epoch": 0.05434782608695652, "grad_norm": 4.535494804382324, "learning_rate": 9.984542558782018e-06, "loss": 1.2161, "step": 705 }, { "epoch": 0.054424915201973484, "grad_norm": 4.041503429412842, "learning_rate": 9.984444311517923e-06, "loss": 1.2332, "step": 706 }, { "epoch": 0.05450200431699044, "grad_norm": 4.140787601470947, "learning_rate": 9.98434575349954e-06, "loss": 1.2384, "step": 707 }, { "epoch": 0.0545790934320074, "grad_norm": 4.458608627319336, "learning_rate": 9.984246884733014e-06, "loss": 1.1531, "step": 708 }, { "epoch": 0.05465618254702436, "grad_norm": 4.457377910614014, "learning_rate": 9.984147705224509e-06, "loss": 1.1315, "step": 709 }, { "epoch": 0.05473327166204132, "grad_norm": 4.368088722229004, "learning_rate": 9.984048214980209e-06, "loss": 1.2896, "step": 710 }, { "epoch": 0.05481036077705828, "grad_norm": 4.405154228210449, "learning_rate": 9.983948414006315e-06, "loss": 1.2627, "step": 711 }, { "epoch": 0.05488744989207524, "grad_norm": 4.391892433166504, "learning_rate": 9.98384830230905e-06, "loss": 1.2523, "step": 712 }, { "epoch": 0.0549645390070922, "grad_norm": 4.066841125488281, "learning_rate": 9.983747879894655e-06, "loss": 1.3027, "step": 713 }, { "epoch": 0.05504162812210916, "grad_norm": 3.8345208168029785, "learning_rate": 9.98364714676939e-06, "loss": 1.2225, "step": 714 }, { "epoch": 0.055118717237126115, "grad_norm": 3.9913508892059326, "learning_rate": 9.983546102939539e-06, "loss": 1.2512, "step": 715 }, { "epoch": 0.05519580635214308, "grad_norm": 4.175078868865967, "learning_rate": 9.983444748411399e-06, "loss": 1.092, "step": 716 }, { "epoch": 0.055272895467160035, "grad_norm": 4.498879909515381, "learning_rate": 9.983343083191287e-06, "loss": 1.2141, "step": 717 }, { "epoch": 0.055349984582177, "grad_norm": 4.295947551727295, "learning_rate": 9.983241107285544e-06, "loss": 1.2172, "step": 718 }, { "epoch": 0.055427073697193956, "grad_norm": 4.796656131744385, "learning_rate": 9.983138820700526e-06, "loss": 1.0474, "step": 719 }, { "epoch": 0.05550416281221091, "grad_norm": 4.575197219848633, "learning_rate": 9.983036223442612e-06, "loss": 1.2008, "step": 720 }, { "epoch": 0.055581251927227876, "grad_norm": 4.701834201812744, "learning_rate": 9.982933315518197e-06, "loss": 1.3672, "step": 721 }, { "epoch": 0.05565834104224483, "grad_norm": 4.1152753829956055, "learning_rate": 9.982830096933697e-06, "loss": 1.1963, "step": 722 }, { "epoch": 0.055735430157261796, "grad_norm": 3.986194372177124, "learning_rate": 9.982726567695547e-06, "loss": 1.2208, "step": 723 }, { "epoch": 0.05581251927227875, "grad_norm": 4.519706726074219, "learning_rate": 9.982622727810202e-06, "loss": 1.2464, "step": 724 }, { "epoch": 0.05588960838729572, "grad_norm": 4.6598992347717285, "learning_rate": 9.982518577284135e-06, "loss": 1.2623, "step": 725 }, { "epoch": 0.05596669750231267, "grad_norm": 4.343133926391602, "learning_rate": 9.98241411612384e-06, "loss": 1.302, "step": 726 }, { "epoch": 0.05604378661732963, "grad_norm": 4.320357322692871, "learning_rate": 9.98230934433583e-06, "loss": 1.2826, "step": 727 }, { "epoch": 0.056120875732346594, "grad_norm": 3.914271116256714, "learning_rate": 9.982204261926636e-06, "loss": 1.0489, "step": 728 }, { "epoch": 0.05619796484736355, "grad_norm": 4.293032169342041, "learning_rate": 9.98209886890281e-06, "loss": 1.0239, "step": 729 }, { "epoch": 0.056275053962380514, "grad_norm": 4.971100807189941, "learning_rate": 9.981993165270922e-06, "loss": 1.3301, "step": 730 }, { "epoch": 0.05635214307739747, "grad_norm": 4.393875598907471, "learning_rate": 9.981887151037563e-06, "loss": 1.1637, "step": 731 }, { "epoch": 0.056429232192414434, "grad_norm": 4.458146095275879, "learning_rate": 9.981780826209342e-06, "loss": 1.2018, "step": 732 }, { "epoch": 0.05650632130743139, "grad_norm": 4.671881198883057, "learning_rate": 9.981674190792887e-06, "loss": 1.2186, "step": 733 }, { "epoch": 0.05658341042244835, "grad_norm": 4.1135029792785645, "learning_rate": 9.98156724479485e-06, "loss": 1.2273, "step": 734 }, { "epoch": 0.05666049953746531, "grad_norm": 4.136716842651367, "learning_rate": 9.981459988221891e-06, "loss": 1.2256, "step": 735 }, { "epoch": 0.05673758865248227, "grad_norm": 4.991126537322998, "learning_rate": 9.981352421080704e-06, "loss": 1.1702, "step": 736 }, { "epoch": 0.05681467776749923, "grad_norm": 3.9331271648406982, "learning_rate": 9.981244543377992e-06, "loss": 1.0802, "step": 737 }, { "epoch": 0.05689176688251619, "grad_norm": 4.251241207122803, "learning_rate": 9.981136355120483e-06, "loss": 1.2407, "step": 738 }, { "epoch": 0.056968855997533145, "grad_norm": 4.525993347167969, "learning_rate": 9.981027856314918e-06, "loss": 1.1908, "step": 739 }, { "epoch": 0.05704594511255011, "grad_norm": 4.329502105712891, "learning_rate": 9.980919046968063e-06, "loss": 1.3154, "step": 740 }, { "epoch": 0.057123034227567066, "grad_norm": 4.49136209487915, "learning_rate": 9.980809927086704e-06, "loss": 1.3037, "step": 741 }, { "epoch": 0.05720012334258403, "grad_norm": 4.0258941650390625, "learning_rate": 9.980700496677643e-06, "loss": 1.1127, "step": 742 }, { "epoch": 0.057277212457600986, "grad_norm": 4.217193126678467, "learning_rate": 9.980590755747698e-06, "loss": 1.1301, "step": 743 }, { "epoch": 0.05735430157261795, "grad_norm": 4.262319087982178, "learning_rate": 9.980480704303719e-06, "loss": 1.2313, "step": 744 }, { "epoch": 0.057431390687634906, "grad_norm": 4.561182975769043, "learning_rate": 9.980370342352562e-06, "loss": 1.2237, "step": 745 }, { "epoch": 0.05750847980265186, "grad_norm": 4.379204750061035, "learning_rate": 9.980259669901105e-06, "loss": 1.2885, "step": 746 }, { "epoch": 0.057585568917668827, "grad_norm": 4.246951103210449, "learning_rate": 9.980148686956252e-06, "loss": 1.2487, "step": 747 }, { "epoch": 0.05766265803268578, "grad_norm": 4.429943084716797, "learning_rate": 9.980037393524925e-06, "loss": 1.2902, "step": 748 }, { "epoch": 0.05773974714770275, "grad_norm": 4.189937114715576, "learning_rate": 9.979925789614053e-06, "loss": 1.1855, "step": 749 }, { "epoch": 0.057816836262719704, "grad_norm": 4.239128589630127, "learning_rate": 9.979813875230604e-06, "loss": 1.3016, "step": 750 }, { "epoch": 0.05789392537773666, "grad_norm": 4.345081329345703, "learning_rate": 9.97970165038155e-06, "loss": 1.3184, "step": 751 }, { "epoch": 0.057971014492753624, "grad_norm": 4.280864238739014, "learning_rate": 9.979589115073888e-06, "loss": 1.3088, "step": 752 }, { "epoch": 0.05804810360777058, "grad_norm": 4.612633228302002, "learning_rate": 9.979476269314635e-06, "loss": 1.125, "step": 753 }, { "epoch": 0.058125192722787544, "grad_norm": 4.349769115447998, "learning_rate": 9.979363113110825e-06, "loss": 1.2171, "step": 754 }, { "epoch": 0.0582022818378045, "grad_norm": 4.421850681304932, "learning_rate": 9.979249646469516e-06, "loss": 1.239, "step": 755 }, { "epoch": 0.058279370952821465, "grad_norm": 3.841928482055664, "learning_rate": 9.979135869397776e-06, "loss": 1.1576, "step": 756 }, { "epoch": 0.05835646006783842, "grad_norm": 4.535758018493652, "learning_rate": 9.979021781902705e-06, "loss": 1.2456, "step": 757 }, { "epoch": 0.05843354918285538, "grad_norm": 4.633460998535156, "learning_rate": 9.978907383991412e-06, "loss": 1.1629, "step": 758 }, { "epoch": 0.05851063829787234, "grad_norm": 4.196053504943848, "learning_rate": 9.978792675671031e-06, "loss": 1.1678, "step": 759 }, { "epoch": 0.0585877274128893, "grad_norm": 4.235719203948975, "learning_rate": 9.978677656948712e-06, "loss": 1.135, "step": 760 }, { "epoch": 0.05866481652790626, "grad_norm": 3.9202029705047607, "learning_rate": 9.978562327831626e-06, "loss": 1.1177, "step": 761 }, { "epoch": 0.05874190564292322, "grad_norm": 4.615420341491699, "learning_rate": 9.978446688326964e-06, "loss": 1.2012, "step": 762 }, { "epoch": 0.05881899475794018, "grad_norm": 4.768507480621338, "learning_rate": 9.978330738441935e-06, "loss": 1.3307, "step": 763 }, { "epoch": 0.05889608387295714, "grad_norm": 4.350339412689209, "learning_rate": 9.978214478183767e-06, "loss": 1.127, "step": 764 }, { "epoch": 0.058973172987974096, "grad_norm": 4.4393134117126465, "learning_rate": 9.97809790755971e-06, "loss": 1.2217, "step": 765 }, { "epoch": 0.05905026210299106, "grad_norm": 4.164086818695068, "learning_rate": 9.97798102657703e-06, "loss": 1.1628, "step": 766 }, { "epoch": 0.059127351218008016, "grad_norm": 4.689718723297119, "learning_rate": 9.977863835243016e-06, "loss": 1.198, "step": 767 }, { "epoch": 0.05920444033302498, "grad_norm": 4.145715236663818, "learning_rate": 9.97774633356497e-06, "loss": 1.2088, "step": 768 }, { "epoch": 0.059281529448041936, "grad_norm": 4.122262477874756, "learning_rate": 9.977628521550223e-06, "loss": 1.1543, "step": 769 }, { "epoch": 0.05935861856305889, "grad_norm": 4.569461822509766, "learning_rate": 9.977510399206118e-06, "loss": 1.1734, "step": 770 }, { "epoch": 0.05943570767807586, "grad_norm": 4.216443061828613, "learning_rate": 9.977391966540017e-06, "loss": 1.2426, "step": 771 }, { "epoch": 0.05951279679309281, "grad_norm": 4.43640661239624, "learning_rate": 9.977273223559306e-06, "loss": 1.1938, "step": 772 }, { "epoch": 0.05958988590810978, "grad_norm": 3.9541783332824707, "learning_rate": 9.977154170271389e-06, "loss": 1.149, "step": 773 }, { "epoch": 0.059666975023126734, "grad_norm": 4.332737445831299, "learning_rate": 9.977034806683685e-06, "loss": 1.2612, "step": 774 }, { "epoch": 0.0597440641381437, "grad_norm": 4.155104637145996, "learning_rate": 9.976915132803638e-06, "loss": 1.0815, "step": 775 }, { "epoch": 0.059821153253160654, "grad_norm": 4.293869495391846, "learning_rate": 9.976795148638707e-06, "loss": 1.2542, "step": 776 }, { "epoch": 0.05989824236817761, "grad_norm": 4.076062202453613, "learning_rate": 9.976674854196377e-06, "loss": 1.1219, "step": 777 }, { "epoch": 0.059975331483194574, "grad_norm": 4.0625152587890625, "learning_rate": 9.976554249484144e-06, "loss": 1.1951, "step": 778 }, { "epoch": 0.06005242059821153, "grad_norm": 4.384305477142334, "learning_rate": 9.976433334509525e-06, "loss": 1.1677, "step": 779 }, { "epoch": 0.060129509713228495, "grad_norm": 4.4182448387146, "learning_rate": 9.976312109280063e-06, "loss": 1.2527, "step": 780 }, { "epoch": 0.06020659882824545, "grad_norm": 3.99910044670105, "learning_rate": 9.976190573803314e-06, "loss": 1.1124, "step": 781 }, { "epoch": 0.06028368794326241, "grad_norm": 4.698251247406006, "learning_rate": 9.976068728086857e-06, "loss": 1.2172, "step": 782 }, { "epoch": 0.06036077705827937, "grad_norm": 4.009255409240723, "learning_rate": 9.975946572138284e-06, "loss": 1.1481, "step": 783 }, { "epoch": 0.06043786617329633, "grad_norm": 4.688887596130371, "learning_rate": 9.975824105965215e-06, "loss": 1.1883, "step": 784 }, { "epoch": 0.06051495528831329, "grad_norm": 4.354454517364502, "learning_rate": 9.975701329575283e-06, "loss": 1.1541, "step": 785 }, { "epoch": 0.06059204440333025, "grad_norm": 4.520771026611328, "learning_rate": 9.97557824297614e-06, "loss": 1.2542, "step": 786 }, { "epoch": 0.06066913351834721, "grad_norm": 4.378393650054932, "learning_rate": 9.975454846175466e-06, "loss": 1.2588, "step": 787 }, { "epoch": 0.06074622263336417, "grad_norm": 3.970188856124878, "learning_rate": 9.975331139180951e-06, "loss": 1.1532, "step": 788 }, { "epoch": 0.060823311748381126, "grad_norm": 4.00541877746582, "learning_rate": 9.975207122000305e-06, "loss": 1.2816, "step": 789 }, { "epoch": 0.06090040086339809, "grad_norm": 4.243439674377441, "learning_rate": 9.975082794641264e-06, "loss": 1.1577, "step": 790 }, { "epoch": 0.060977489978415046, "grad_norm": 4.396401405334473, "learning_rate": 9.974958157111578e-06, "loss": 1.2457, "step": 791 }, { "epoch": 0.06105457909343201, "grad_norm": 4.4037017822265625, "learning_rate": 9.974833209419016e-06, "loss": 1.2012, "step": 792 }, { "epoch": 0.06113166820844897, "grad_norm": 4.3235907554626465, "learning_rate": 9.974707951571369e-06, "loss": 1.0912, "step": 793 }, { "epoch": 0.06120875732346592, "grad_norm": 4.980129718780518, "learning_rate": 9.974582383576446e-06, "loss": 1.1828, "step": 794 }, { "epoch": 0.06128584643848289, "grad_norm": 4.494161128997803, "learning_rate": 9.974456505442073e-06, "loss": 1.3294, "step": 795 }, { "epoch": 0.061362935553499844, "grad_norm": 3.907623767852783, "learning_rate": 9.974330317176103e-06, "loss": 1.1584, "step": 796 }, { "epoch": 0.06144002466851681, "grad_norm": 4.022689342498779, "learning_rate": 9.9742038187864e-06, "loss": 1.1379, "step": 797 }, { "epoch": 0.061517113783533764, "grad_norm": 4.5095977783203125, "learning_rate": 9.974077010280851e-06, "loss": 1.2845, "step": 798 }, { "epoch": 0.06159420289855073, "grad_norm": 4.331615447998047, "learning_rate": 9.97394989166736e-06, "loss": 1.1422, "step": 799 }, { "epoch": 0.061671292013567684, "grad_norm": 4.0961527824401855, "learning_rate": 9.973822462953856e-06, "loss": 1.248, "step": 800 }, { "epoch": 0.06174838112858464, "grad_norm": 4.432220935821533, "learning_rate": 9.973694724148281e-06, "loss": 1.1686, "step": 801 }, { "epoch": 0.061825470243601605, "grad_norm": 4.118536949157715, "learning_rate": 9.973566675258598e-06, "loss": 1.1243, "step": 802 }, { "epoch": 0.06190255935861856, "grad_norm": 3.9236130714416504, "learning_rate": 9.973438316292794e-06, "loss": 1.1906, "step": 803 }, { "epoch": 0.061979648473635525, "grad_norm": 4.201266288757324, "learning_rate": 9.97330964725887e-06, "loss": 1.109, "step": 804 }, { "epoch": 0.06205673758865248, "grad_norm": 4.015562534332275, "learning_rate": 9.973180668164844e-06, "loss": 1.1616, "step": 805 }, { "epoch": 0.062133826703669445, "grad_norm": 5.223426818847656, "learning_rate": 9.97305137901876e-06, "loss": 1.382, "step": 806 }, { "epoch": 0.0622109158186864, "grad_norm": 4.341038227081299, "learning_rate": 9.972921779828679e-06, "loss": 1.1932, "step": 807 }, { "epoch": 0.06228800493370336, "grad_norm": 4.124241352081299, "learning_rate": 9.972791870602682e-06, "loss": 1.2678, "step": 808 }, { "epoch": 0.06236509404872032, "grad_norm": 4.108554363250732, "learning_rate": 9.972661651348865e-06, "loss": 1.2885, "step": 809 }, { "epoch": 0.06244218316373728, "grad_norm": 4.48992919921875, "learning_rate": 9.972531122075349e-06, "loss": 1.2948, "step": 810 }, { "epoch": 0.06251927227875424, "grad_norm": 4.17953634262085, "learning_rate": 9.97240028279027e-06, "loss": 1.0968, "step": 811 }, { "epoch": 0.0625963613937712, "grad_norm": 4.093837738037109, "learning_rate": 9.972269133501787e-06, "loss": 1.1572, "step": 812 }, { "epoch": 0.06267345050878816, "grad_norm": 4.524899482727051, "learning_rate": 9.972137674218077e-06, "loss": 1.2688, "step": 813 }, { "epoch": 0.06275053962380511, "grad_norm": 4.406780242919922, "learning_rate": 9.972005904947332e-06, "loss": 1.3075, "step": 814 }, { "epoch": 0.06282762873882208, "grad_norm": 4.174956798553467, "learning_rate": 9.97187382569777e-06, "loss": 1.1822, "step": 815 }, { "epoch": 0.06290471785383904, "grad_norm": 3.8772571086883545, "learning_rate": 9.971741436477625e-06, "loss": 1.1562, "step": 816 }, { "epoch": 0.062981806968856, "grad_norm": 3.9671881198883057, "learning_rate": 9.971608737295151e-06, "loss": 1.2294, "step": 817 }, { "epoch": 0.06305889608387295, "grad_norm": 4.8057098388671875, "learning_rate": 9.971475728158622e-06, "loss": 1.2987, "step": 818 }, { "epoch": 0.06313598519888991, "grad_norm": 4.444809913635254, "learning_rate": 9.971342409076328e-06, "loss": 1.2099, "step": 819 }, { "epoch": 0.06321307431390688, "grad_norm": 4.493791103363037, "learning_rate": 9.971208780056582e-06, "loss": 1.1324, "step": 820 }, { "epoch": 0.06329016342892384, "grad_norm": 4.225183963775635, "learning_rate": 9.971074841107715e-06, "loss": 1.1474, "step": 821 }, { "epoch": 0.0633672525439408, "grad_norm": 4.375453948974609, "learning_rate": 9.970940592238077e-06, "loss": 1.0902, "step": 822 }, { "epoch": 0.06344434165895775, "grad_norm": 4.046267509460449, "learning_rate": 9.97080603345604e-06, "loss": 1.3007, "step": 823 }, { "epoch": 0.06352143077397472, "grad_norm": 4.696292400360107, "learning_rate": 9.970671164769989e-06, "loss": 1.0967, "step": 824 }, { "epoch": 0.06359851988899168, "grad_norm": 3.9629440307617188, "learning_rate": 9.970535986188337e-06, "loss": 1.0908, "step": 825 }, { "epoch": 0.06367560900400863, "grad_norm": 4.310845851898193, "learning_rate": 9.970400497719508e-06, "loss": 1.1777, "step": 826 }, { "epoch": 0.06375269811902559, "grad_norm": 4.32405948638916, "learning_rate": 9.97026469937195e-06, "loss": 1.2285, "step": 827 }, { "epoch": 0.06382978723404255, "grad_norm": 4.7154130935668945, "learning_rate": 9.970128591154131e-06, "loss": 1.3087, "step": 828 }, { "epoch": 0.06390687634905952, "grad_norm": 4.456521034240723, "learning_rate": 9.969992173074534e-06, "loss": 1.2192, "step": 829 }, { "epoch": 0.06398396546407648, "grad_norm": 3.9812963008880615, "learning_rate": 9.969855445141666e-06, "loss": 1.1708, "step": 830 }, { "epoch": 0.06406105457909343, "grad_norm": 4.128241539001465, "learning_rate": 9.969718407364051e-06, "loss": 1.0954, "step": 831 }, { "epoch": 0.06413814369411039, "grad_norm": 4.4097089767456055, "learning_rate": 9.969581059750231e-06, "loss": 1.1696, "step": 832 }, { "epoch": 0.06421523280912735, "grad_norm": 4.542466163635254, "learning_rate": 9.96944340230877e-06, "loss": 1.3143, "step": 833 }, { "epoch": 0.06429232192414432, "grad_norm": 3.86757755279541, "learning_rate": 9.969305435048251e-06, "loss": 1.2229, "step": 834 }, { "epoch": 0.06436941103916127, "grad_norm": 4.428218841552734, "learning_rate": 9.969167157977275e-06, "loss": 1.1942, "step": 835 }, { "epoch": 0.06444650015417823, "grad_norm": 4.047502040863037, "learning_rate": 9.969028571104462e-06, "loss": 1.1897, "step": 836 }, { "epoch": 0.06452358926919519, "grad_norm": 4.053440570831299, "learning_rate": 9.968889674438453e-06, "loss": 1.1565, "step": 837 }, { "epoch": 0.06460067838421214, "grad_norm": 4.556980609893799, "learning_rate": 9.968750467987907e-06, "loss": 1.2299, "step": 838 }, { "epoch": 0.06467776749922911, "grad_norm": 4.244764804840088, "learning_rate": 9.968610951761504e-06, "loss": 1.2834, "step": 839 }, { "epoch": 0.06475485661424607, "grad_norm": 4.483509540557861, "learning_rate": 9.968471125767942e-06, "loss": 1.2776, "step": 840 }, { "epoch": 0.06483194572926303, "grad_norm": 4.05586051940918, "learning_rate": 9.968330990015935e-06, "loss": 1.3225, "step": 841 }, { "epoch": 0.06490903484427998, "grad_norm": 4.234732627868652, "learning_rate": 9.968190544514225e-06, "loss": 1.3191, "step": 842 }, { "epoch": 0.06498612395929695, "grad_norm": 3.936809778213501, "learning_rate": 9.968049789271564e-06, "loss": 1.1848, "step": 843 }, { "epoch": 0.06506321307431391, "grad_norm": 4.090641498565674, "learning_rate": 9.96790872429673e-06, "loss": 1.1973, "step": 844 }, { "epoch": 0.06514030218933087, "grad_norm": 4.085000038146973, "learning_rate": 9.967767349598517e-06, "loss": 1.1307, "step": 845 }, { "epoch": 0.06521739130434782, "grad_norm": 4.048746585845947, "learning_rate": 9.967625665185737e-06, "loss": 1.2981, "step": 846 }, { "epoch": 0.06529448041936478, "grad_norm": 4.098531246185303, "learning_rate": 9.967483671067224e-06, "loss": 1.2129, "step": 847 }, { "epoch": 0.06537156953438175, "grad_norm": 4.320443630218506, "learning_rate": 9.967341367251833e-06, "loss": 1.1298, "step": 848 }, { "epoch": 0.06544865864939871, "grad_norm": 4.260039806365967, "learning_rate": 9.967198753748432e-06, "loss": 1.2355, "step": 849 }, { "epoch": 0.06552574776441566, "grad_norm": 4.163205623626709, "learning_rate": 9.967055830565917e-06, "loss": 1.2541, "step": 850 }, { "epoch": 0.06560283687943262, "grad_norm": 3.9784324169158936, "learning_rate": 9.966912597713196e-06, "loss": 1.1646, "step": 851 }, { "epoch": 0.06567992599444958, "grad_norm": 4.5116682052612305, "learning_rate": 9.966769055199197e-06, "loss": 1.2124, "step": 852 }, { "epoch": 0.06575701510946655, "grad_norm": 3.7753348350524902, "learning_rate": 9.966625203032871e-06, "loss": 1.1988, "step": 853 }, { "epoch": 0.0658341042244835, "grad_norm": 4.4301323890686035, "learning_rate": 9.966481041223188e-06, "loss": 1.146, "step": 854 }, { "epoch": 0.06591119333950046, "grad_norm": 4.574663162231445, "learning_rate": 9.966336569779133e-06, "loss": 1.1577, "step": 855 }, { "epoch": 0.06598828245451742, "grad_norm": 3.872276544570923, "learning_rate": 9.966191788709716e-06, "loss": 1.0697, "step": 856 }, { "epoch": 0.06606537156953438, "grad_norm": 3.989187240600586, "learning_rate": 9.96604669802396e-06, "loss": 1.1815, "step": 857 }, { "epoch": 0.06614246068455135, "grad_norm": 4.288700580596924, "learning_rate": 9.965901297730914e-06, "loss": 1.0518, "step": 858 }, { "epoch": 0.0662195497995683, "grad_norm": 4.061006546020508, "learning_rate": 9.965755587839638e-06, "loss": 1.2016, "step": 859 }, { "epoch": 0.06629663891458526, "grad_norm": 4.13923454284668, "learning_rate": 9.96560956835922e-06, "loss": 1.1651, "step": 860 }, { "epoch": 0.06637372802960222, "grad_norm": 4.172252655029297, "learning_rate": 9.965463239298764e-06, "loss": 1.1611, "step": 861 }, { "epoch": 0.06645081714461917, "grad_norm": 4.616154670715332, "learning_rate": 9.965316600667394e-06, "loss": 1.2395, "step": 862 }, { "epoch": 0.06652790625963614, "grad_norm": 4.157519340515137, "learning_rate": 9.965169652474247e-06, "loss": 1.271, "step": 863 }, { "epoch": 0.0666049953746531, "grad_norm": 4.29591178894043, "learning_rate": 9.965022394728487e-06, "loss": 1.252, "step": 864 }, { "epoch": 0.06668208448967006, "grad_norm": 4.058745861053467, "learning_rate": 9.964874827439297e-06, "loss": 1.2369, "step": 865 }, { "epoch": 0.06675917360468701, "grad_norm": 4.279184818267822, "learning_rate": 9.964726950615875e-06, "loss": 1.3584, "step": 866 }, { "epoch": 0.06683626271970398, "grad_norm": 4.2356672286987305, "learning_rate": 9.964578764267441e-06, "loss": 1.1432, "step": 867 }, { "epoch": 0.06691335183472094, "grad_norm": 5.137538909912109, "learning_rate": 9.96443026840323e-06, "loss": 1.1755, "step": 868 }, { "epoch": 0.0669904409497379, "grad_norm": 4.083669185638428, "learning_rate": 9.964281463032507e-06, "loss": 1.285, "step": 869 }, { "epoch": 0.06706753006475485, "grad_norm": 4.169551372528076, "learning_rate": 9.964132348164544e-06, "loss": 1.1206, "step": 870 }, { "epoch": 0.06714461917977181, "grad_norm": 4.000573635101318, "learning_rate": 9.96398292380864e-06, "loss": 1.1801, "step": 871 }, { "epoch": 0.06722170829478878, "grad_norm": 4.286387920379639, "learning_rate": 9.96383318997411e-06, "loss": 1.0842, "step": 872 }, { "epoch": 0.06729879740980574, "grad_norm": 3.734882354736328, "learning_rate": 9.963683146670286e-06, "loss": 1.1005, "step": 873 }, { "epoch": 0.0673758865248227, "grad_norm": 4.232124328613281, "learning_rate": 9.963532793906529e-06, "loss": 1.2437, "step": 874 }, { "epoch": 0.06745297563983965, "grad_norm": 4.824527740478516, "learning_rate": 9.963382131692208e-06, "loss": 1.2967, "step": 875 }, { "epoch": 0.06753006475485661, "grad_norm": 4.362327575683594, "learning_rate": 9.963231160036716e-06, "loss": 1.23, "step": 876 }, { "epoch": 0.06760715386987358, "grad_norm": 4.00999641418457, "learning_rate": 9.963079878949466e-06, "loss": 1.2246, "step": 877 }, { "epoch": 0.06768424298489054, "grad_norm": 4.2067131996154785, "learning_rate": 9.962928288439891e-06, "loss": 1.2255, "step": 878 }, { "epoch": 0.06776133209990749, "grad_norm": 4.093700408935547, "learning_rate": 9.962776388517441e-06, "loss": 1.199, "step": 879 }, { "epoch": 0.06783842121492445, "grad_norm": 4.058950424194336, "learning_rate": 9.962624179191586e-06, "loss": 1.1883, "step": 880 }, { "epoch": 0.0679155103299414, "grad_norm": 4.307193756103516, "learning_rate": 9.962471660471815e-06, "loss": 1.1255, "step": 881 }, { "epoch": 0.06799259944495838, "grad_norm": 3.914050579071045, "learning_rate": 9.96231883236764e-06, "loss": 1.1739, "step": 882 }, { "epoch": 0.06806968855997533, "grad_norm": 3.9826557636260986, "learning_rate": 9.962165694888583e-06, "loss": 1.1386, "step": 883 }, { "epoch": 0.06814677767499229, "grad_norm": 3.8433432579040527, "learning_rate": 9.962012248044195e-06, "loss": 1.0846, "step": 884 }, { "epoch": 0.06822386679000925, "grad_norm": 3.975701093673706, "learning_rate": 9.961858491844044e-06, "loss": 1.1565, "step": 885 }, { "epoch": 0.06830095590502622, "grad_norm": 4.125971794128418, "learning_rate": 9.961704426297712e-06, "loss": 1.1605, "step": 886 }, { "epoch": 0.06837804502004317, "grad_norm": 4.062132358551025, "learning_rate": 9.961550051414808e-06, "loss": 1.2098, "step": 887 }, { "epoch": 0.06845513413506013, "grad_norm": 4.446691036224365, "learning_rate": 9.961395367204953e-06, "loss": 1.1216, "step": 888 }, { "epoch": 0.06853222325007709, "grad_norm": 4.357199668884277, "learning_rate": 9.961240373677793e-06, "loss": 1.0838, "step": 889 }, { "epoch": 0.06860931236509404, "grad_norm": 4.313502311706543, "learning_rate": 9.961085070842992e-06, "loss": 1.1937, "step": 890 }, { "epoch": 0.06868640148011101, "grad_norm": 3.93827223777771, "learning_rate": 9.960929458710231e-06, "loss": 1.1371, "step": 891 }, { "epoch": 0.06876349059512797, "grad_norm": 3.7555925846099854, "learning_rate": 9.960773537289213e-06, "loss": 1.1783, "step": 892 }, { "epoch": 0.06884057971014493, "grad_norm": 4.327067852020264, "learning_rate": 9.960617306589654e-06, "loss": 1.2371, "step": 893 }, { "epoch": 0.06891766882516188, "grad_norm": 3.748533248901367, "learning_rate": 9.960460766621299e-06, "loss": 1.1646, "step": 894 }, { "epoch": 0.06899475794017884, "grad_norm": 4.586325168609619, "learning_rate": 9.960303917393906e-06, "loss": 1.2139, "step": 895 }, { "epoch": 0.06907184705519581, "grad_norm": 4.419130802154541, "learning_rate": 9.960146758917254e-06, "loss": 1.4065, "step": 896 }, { "epoch": 0.06914893617021277, "grad_norm": 4.031462669372559, "learning_rate": 9.959989291201141e-06, "loss": 1.2925, "step": 897 }, { "epoch": 0.06922602528522973, "grad_norm": 4.59142541885376, "learning_rate": 9.959831514255383e-06, "loss": 1.2006, "step": 898 }, { "epoch": 0.06930311440024668, "grad_norm": 3.835858106613159, "learning_rate": 9.95967342808982e-06, "loss": 1.2036, "step": 899 }, { "epoch": 0.06938020351526364, "grad_norm": 4.229325771331787, "learning_rate": 9.959515032714304e-06, "loss": 1.1446, "step": 900 }, { "epoch": 0.06945729263028061, "grad_norm": 4.047120094299316, "learning_rate": 9.959356328138712e-06, "loss": 1.1789, "step": 901 }, { "epoch": 0.06953438174529757, "grad_norm": 3.6105780601501465, "learning_rate": 9.959197314372937e-06, "loss": 1.192, "step": 902 }, { "epoch": 0.06961147086031452, "grad_norm": 4.577813625335693, "learning_rate": 9.959037991426896e-06, "loss": 1.2333, "step": 903 }, { "epoch": 0.06968855997533148, "grad_norm": 3.890207052230835, "learning_rate": 9.958878359310518e-06, "loss": 1.1167, "step": 904 }, { "epoch": 0.06976564909034844, "grad_norm": 4.432231426239014, "learning_rate": 9.958718418033757e-06, "loss": 1.196, "step": 905 }, { "epoch": 0.0698427382053654, "grad_norm": 3.846874475479126, "learning_rate": 9.958558167606585e-06, "loss": 1.1761, "step": 906 }, { "epoch": 0.06991982732038236, "grad_norm": 4.190489292144775, "learning_rate": 9.958397608038994e-06, "loss": 1.129, "step": 907 }, { "epoch": 0.06999691643539932, "grad_norm": 3.783397912979126, "learning_rate": 9.95823673934099e-06, "loss": 1.2228, "step": 908 }, { "epoch": 0.07007400555041628, "grad_norm": 3.776473045349121, "learning_rate": 9.958075561522605e-06, "loss": 1.1348, "step": 909 }, { "epoch": 0.07015109466543325, "grad_norm": 4.117181777954102, "learning_rate": 9.957914074593889e-06, "loss": 1.1504, "step": 910 }, { "epoch": 0.0702281837804502, "grad_norm": 4.3804545402526855, "learning_rate": 9.957752278564905e-06, "loss": 1.2356, "step": 911 }, { "epoch": 0.07030527289546716, "grad_norm": 4.283797264099121, "learning_rate": 9.957590173445746e-06, "loss": 1.2674, "step": 912 }, { "epoch": 0.07038236201048412, "grad_norm": 4.3605852127075195, "learning_rate": 9.957427759246515e-06, "loss": 1.2091, "step": 913 }, { "epoch": 0.07045945112550107, "grad_norm": 4.470867156982422, "learning_rate": 9.957265035977338e-06, "loss": 1.2004, "step": 914 }, { "epoch": 0.07053654024051804, "grad_norm": 4.192192077636719, "learning_rate": 9.95710200364836e-06, "loss": 1.2206, "step": 915 }, { "epoch": 0.070613629355535, "grad_norm": 4.35572624206543, "learning_rate": 9.956938662269745e-06, "loss": 1.2407, "step": 916 }, { "epoch": 0.07069071847055196, "grad_norm": 3.7318854331970215, "learning_rate": 9.956775011851679e-06, "loss": 1.1075, "step": 917 }, { "epoch": 0.07076780758556891, "grad_norm": 4.309021949768066, "learning_rate": 9.956611052404362e-06, "loss": 1.1874, "step": 918 }, { "epoch": 0.07084489670058587, "grad_norm": 4.061330318450928, "learning_rate": 9.956446783938016e-06, "loss": 1.2302, "step": 919 }, { "epoch": 0.07092198581560284, "grad_norm": 4.194582462310791, "learning_rate": 9.956282206462886e-06, "loss": 1.2344, "step": 920 }, { "epoch": 0.0709990749306198, "grad_norm": 3.940514326095581, "learning_rate": 9.956117319989226e-06, "loss": 1.1443, "step": 921 }, { "epoch": 0.07107616404563676, "grad_norm": 4.2155046463012695, "learning_rate": 9.95595212452732e-06, "loss": 1.1655, "step": 922 }, { "epoch": 0.07115325316065371, "grad_norm": 4.344760417938232, "learning_rate": 9.95578662008747e-06, "loss": 1.3106, "step": 923 }, { "epoch": 0.07123034227567067, "grad_norm": 3.8140082359313965, "learning_rate": 9.955620806679987e-06, "loss": 1.188, "step": 924 }, { "epoch": 0.07130743139068764, "grad_norm": 3.9605846405029297, "learning_rate": 9.955454684315215e-06, "loss": 1.21, "step": 925 }, { "epoch": 0.0713845205057046, "grad_norm": 4.253360271453857, "learning_rate": 9.955288253003508e-06, "loss": 1.283, "step": 926 }, { "epoch": 0.07146160962072155, "grad_norm": 4.852066516876221, "learning_rate": 9.955121512755242e-06, "loss": 1.2646, "step": 927 }, { "epoch": 0.07153869873573851, "grad_norm": 4.225571155548096, "learning_rate": 9.954954463580813e-06, "loss": 1.1779, "step": 928 }, { "epoch": 0.07161578785075548, "grad_norm": 4.433430194854736, "learning_rate": 9.954787105490635e-06, "loss": 1.3087, "step": 929 }, { "epoch": 0.07169287696577244, "grad_norm": 4.311334609985352, "learning_rate": 9.954619438495142e-06, "loss": 1.1685, "step": 930 }, { "epoch": 0.0717699660807894, "grad_norm": 4.312464237213135, "learning_rate": 9.954451462604788e-06, "loss": 1.2989, "step": 931 }, { "epoch": 0.07184705519580635, "grad_norm": 4.1521477699279785, "learning_rate": 9.954283177830047e-06, "loss": 1.2642, "step": 932 }, { "epoch": 0.07192414431082331, "grad_norm": 4.0158820152282715, "learning_rate": 9.954114584181407e-06, "loss": 1.1011, "step": 933 }, { "epoch": 0.07200123342584028, "grad_norm": 3.9613866806030273, "learning_rate": 9.953945681669381e-06, "loss": 1.174, "step": 934 }, { "epoch": 0.07207832254085723, "grad_norm": 4.178623199462891, "learning_rate": 9.953776470304499e-06, "loss": 1.174, "step": 935 }, { "epoch": 0.07215541165587419, "grad_norm": 4.1417131423950195, "learning_rate": 9.95360695009731e-06, "loss": 1.2549, "step": 936 }, { "epoch": 0.07223250077089115, "grad_norm": 4.326009750366211, "learning_rate": 9.953437121058384e-06, "loss": 1.1613, "step": 937 }, { "epoch": 0.0723095898859081, "grad_norm": 3.9384677410125732, "learning_rate": 9.953266983198307e-06, "loss": 1.1953, "step": 938 }, { "epoch": 0.07238667900092507, "grad_norm": 4.292700290679932, "learning_rate": 9.953096536527688e-06, "loss": 1.2971, "step": 939 }, { "epoch": 0.07246376811594203, "grad_norm": 3.9370415210723877, "learning_rate": 9.952925781057152e-06, "loss": 1.2132, "step": 940 }, { "epoch": 0.07254085723095899, "grad_norm": 4.184019088745117, "learning_rate": 9.952754716797345e-06, "loss": 1.1628, "step": 941 }, { "epoch": 0.07261794634597594, "grad_norm": 3.967825412750244, "learning_rate": 9.952583343758934e-06, "loss": 1.242, "step": 942 }, { "epoch": 0.0726950354609929, "grad_norm": 4.628615379333496, "learning_rate": 9.9524116619526e-06, "loss": 1.2041, "step": 943 }, { "epoch": 0.07277212457600987, "grad_norm": 3.8717031478881836, "learning_rate": 9.952239671389049e-06, "loss": 1.0256, "step": 944 }, { "epoch": 0.07284921369102683, "grad_norm": 3.9724347591400146, "learning_rate": 9.952067372079003e-06, "loss": 1.1419, "step": 945 }, { "epoch": 0.07292630280604379, "grad_norm": 4.037529468536377, "learning_rate": 9.951894764033202e-06, "loss": 1.2491, "step": 946 }, { "epoch": 0.07300339192106074, "grad_norm": 4.202883720397949, "learning_rate": 9.951721847262413e-06, "loss": 1.2303, "step": 947 }, { "epoch": 0.0730804810360777, "grad_norm": 4.007006645202637, "learning_rate": 9.951548621777409e-06, "loss": 1.1551, "step": 948 }, { "epoch": 0.07315757015109467, "grad_norm": 4.146481513977051, "learning_rate": 9.951375087588993e-06, "loss": 1.2194, "step": 949 }, { "epoch": 0.07323465926611163, "grad_norm": 4.004715919494629, "learning_rate": 9.951201244707986e-06, "loss": 1.1161, "step": 950 }, { "epoch": 0.07331174838112858, "grad_norm": 4.251381874084473, "learning_rate": 9.951027093145222e-06, "loss": 1.2399, "step": 951 }, { "epoch": 0.07338883749614554, "grad_norm": 4.143471717834473, "learning_rate": 9.950852632911563e-06, "loss": 1.1627, "step": 952 }, { "epoch": 0.07346592661116251, "grad_norm": 3.974905252456665, "learning_rate": 9.950677864017882e-06, "loss": 1.1123, "step": 953 }, { "epoch": 0.07354301572617947, "grad_norm": 4.213186740875244, "learning_rate": 9.950502786475078e-06, "loss": 1.224, "step": 954 }, { "epoch": 0.07362010484119642, "grad_norm": 4.501376628875732, "learning_rate": 9.950327400294063e-06, "loss": 1.254, "step": 955 }, { "epoch": 0.07369719395621338, "grad_norm": 3.8753960132598877, "learning_rate": 9.950151705485774e-06, "loss": 1.1435, "step": 956 }, { "epoch": 0.07377428307123034, "grad_norm": 4.606151580810547, "learning_rate": 9.949975702061162e-06, "loss": 1.2086, "step": 957 }, { "epoch": 0.07385137218624731, "grad_norm": 3.9693284034729004, "learning_rate": 9.949799390031203e-06, "loss": 1.2506, "step": 958 }, { "epoch": 0.07392846130126426, "grad_norm": 4.064651966094971, "learning_rate": 9.949622769406888e-06, "loss": 1.1616, "step": 959 }, { "epoch": 0.07400555041628122, "grad_norm": 4.274019241333008, "learning_rate": 9.949445840199227e-06, "loss": 1.2061, "step": 960 }, { "epoch": 0.07408263953129818, "grad_norm": 4.298183917999268, "learning_rate": 9.949268602419253e-06, "loss": 1.2899, "step": 961 }, { "epoch": 0.07415972864631513, "grad_norm": 4.174452781677246, "learning_rate": 9.949091056078012e-06, "loss": 1.2072, "step": 962 }, { "epoch": 0.0742368177613321, "grad_norm": 4.208117485046387, "learning_rate": 9.948913201186579e-06, "loss": 1.2707, "step": 963 }, { "epoch": 0.07431390687634906, "grad_norm": 4.637174606323242, "learning_rate": 9.948735037756037e-06, "loss": 1.2792, "step": 964 }, { "epoch": 0.07439099599136602, "grad_norm": 3.6787731647491455, "learning_rate": 9.948556565797497e-06, "loss": 1.1813, "step": 965 }, { "epoch": 0.07446808510638298, "grad_norm": 4.200822830200195, "learning_rate": 9.948377785322082e-06, "loss": 1.2527, "step": 966 }, { "epoch": 0.07454517422139993, "grad_norm": 3.7479918003082275, "learning_rate": 9.948198696340943e-06, "loss": 1.0949, "step": 967 }, { "epoch": 0.0746222633364169, "grad_norm": 3.8182127475738525, "learning_rate": 9.94801929886524e-06, "loss": 1.1945, "step": 968 }, { "epoch": 0.07469935245143386, "grad_norm": 4.381171703338623, "learning_rate": 9.947839592906163e-06, "loss": 1.302, "step": 969 }, { "epoch": 0.07477644156645082, "grad_norm": 4.537683963775635, "learning_rate": 9.947659578474911e-06, "loss": 1.3543, "step": 970 }, { "epoch": 0.07485353068146777, "grad_norm": 4.541914939880371, "learning_rate": 9.94747925558271e-06, "loss": 1.213, "step": 971 }, { "epoch": 0.07493061979648474, "grad_norm": 4.564115524291992, "learning_rate": 9.9472986242408e-06, "loss": 1.2574, "step": 972 }, { "epoch": 0.0750077089115017, "grad_norm": 4.06321907043457, "learning_rate": 9.947117684460443e-06, "loss": 1.227, "step": 973 }, { "epoch": 0.07508479802651866, "grad_norm": 4.070244312286377, "learning_rate": 9.946936436252923e-06, "loss": 1.2062, "step": 974 }, { "epoch": 0.07516188714153561, "grad_norm": 3.8233132362365723, "learning_rate": 9.946754879629535e-06, "loss": 1.1082, "step": 975 }, { "epoch": 0.07523897625655257, "grad_norm": 4.254786014556885, "learning_rate": 9.9465730146016e-06, "loss": 1.2617, "step": 976 }, { "epoch": 0.07531606537156954, "grad_norm": 5.448151111602783, "learning_rate": 9.946390841180457e-06, "loss": 1.3154, "step": 977 }, { "epoch": 0.0753931544865865, "grad_norm": 3.762033462524414, "learning_rate": 9.946208359377463e-06, "loss": 1.1022, "step": 978 }, { "epoch": 0.07547024360160345, "grad_norm": 4.151965618133545, "learning_rate": 9.946025569203994e-06, "loss": 1.1938, "step": 979 }, { "epoch": 0.07554733271662041, "grad_norm": 3.941340684890747, "learning_rate": 9.945842470671447e-06, "loss": 1.2629, "step": 980 }, { "epoch": 0.07562442183163737, "grad_norm": 3.871173620223999, "learning_rate": 9.945659063791239e-06, "loss": 1.0924, "step": 981 }, { "epoch": 0.07570151094665434, "grad_norm": 3.7768266201019287, "learning_rate": 9.9454753485748e-06, "loss": 1.0993, "step": 982 }, { "epoch": 0.0757786000616713, "grad_norm": 3.877859354019165, "learning_rate": 9.945291325033587e-06, "loss": 1.1623, "step": 983 }, { "epoch": 0.07585568917668825, "grad_norm": 3.9639294147491455, "learning_rate": 9.945106993179074e-06, "loss": 1.0856, "step": 984 }, { "epoch": 0.07593277829170521, "grad_norm": 4.09315824508667, "learning_rate": 9.94492235302275e-06, "loss": 1.2126, "step": 985 }, { "epoch": 0.07600986740672216, "grad_norm": 4.234272480010986, "learning_rate": 9.944737404576129e-06, "loss": 1.1984, "step": 986 }, { "epoch": 0.07608695652173914, "grad_norm": 3.7921226024627686, "learning_rate": 9.94455214785074e-06, "loss": 1.158, "step": 987 }, { "epoch": 0.07616404563675609, "grad_norm": 4.407901763916016, "learning_rate": 9.944366582858131e-06, "loss": 1.1847, "step": 988 }, { "epoch": 0.07624113475177305, "grad_norm": 4.234422206878662, "learning_rate": 9.944180709609874e-06, "loss": 1.1911, "step": 989 }, { "epoch": 0.07631822386679, "grad_norm": 3.9739015102386475, "learning_rate": 9.943994528117557e-06, "loss": 1.1974, "step": 990 }, { "epoch": 0.07639531298180696, "grad_norm": 4.158312797546387, "learning_rate": 9.943808038392786e-06, "loss": 1.1854, "step": 991 }, { "epoch": 0.07647240209682393, "grad_norm": 3.7783594131469727, "learning_rate": 9.943621240447188e-06, "loss": 1.1394, "step": 992 }, { "epoch": 0.07654949121184089, "grad_norm": 4.1893157958984375, "learning_rate": 9.943434134292412e-06, "loss": 1.2535, "step": 993 }, { "epoch": 0.07662658032685785, "grad_norm": 4.0416388511657715, "learning_rate": 9.943246719940118e-06, "loss": 1.14, "step": 994 }, { "epoch": 0.0767036694418748, "grad_norm": 5.405656337738037, "learning_rate": 9.943058997401993e-06, "loss": 1.2001, "step": 995 }, { "epoch": 0.07678075855689177, "grad_norm": 3.7127041816711426, "learning_rate": 9.942870966689742e-06, "loss": 1.1386, "step": 996 }, { "epoch": 0.07685784767190873, "grad_norm": 4.761865615844727, "learning_rate": 9.942682627815084e-06, "loss": 1.1629, "step": 997 }, { "epoch": 0.07693493678692569, "grad_norm": 4.404749393463135, "learning_rate": 9.942493980789762e-06, "loss": 1.1531, "step": 998 }, { "epoch": 0.07701202590194264, "grad_norm": 4.3769402503967285, "learning_rate": 9.94230502562554e-06, "loss": 1.2155, "step": 999 }, { "epoch": 0.0770891150169596, "grad_norm": 4.384872913360596, "learning_rate": 9.942115762334196e-06, "loss": 1.287, "step": 1000 }, { "epoch": 0.07716620413197657, "grad_norm": 3.899238109588623, "learning_rate": 9.941926190927532e-06, "loss": 1.1246, "step": 1001 }, { "epoch": 0.07724329324699353, "grad_norm": 3.7073872089385986, "learning_rate": 9.941736311417362e-06, "loss": 1.2122, "step": 1002 }, { "epoch": 0.07732038236201048, "grad_norm": 3.755033254623413, "learning_rate": 9.94154612381553e-06, "loss": 1.1603, "step": 1003 }, { "epoch": 0.07739747147702744, "grad_norm": 4.051525115966797, "learning_rate": 9.941355628133887e-06, "loss": 1.121, "step": 1004 }, { "epoch": 0.0774745605920444, "grad_norm": 3.882408380508423, "learning_rate": 9.941164824384313e-06, "loss": 1.2453, "step": 1005 }, { "epoch": 0.07755164970706137, "grad_norm": 4.029034614562988, "learning_rate": 9.940973712578706e-06, "loss": 1.1899, "step": 1006 }, { "epoch": 0.07762873882207832, "grad_norm": 4.212346076965332, "learning_rate": 9.940782292728975e-06, "loss": 1.2007, "step": 1007 }, { "epoch": 0.07770582793709528, "grad_norm": 4.014603137969971, "learning_rate": 9.940590564847059e-06, "loss": 1.2438, "step": 1008 }, { "epoch": 0.07778291705211224, "grad_norm": 4.43209171295166, "learning_rate": 9.940398528944906e-06, "loss": 1.2154, "step": 1009 }, { "epoch": 0.0778600061671292, "grad_norm": 4.110109329223633, "learning_rate": 9.940206185034496e-06, "loss": 1.0843, "step": 1010 }, { "epoch": 0.07793709528214617, "grad_norm": 4.09305477142334, "learning_rate": 9.940013533127813e-06, "loss": 1.1252, "step": 1011 }, { "epoch": 0.07801418439716312, "grad_norm": 3.9109747409820557, "learning_rate": 9.939820573236873e-06, "loss": 1.3243, "step": 1012 }, { "epoch": 0.07809127351218008, "grad_norm": 4.246638774871826, "learning_rate": 9.939627305373703e-06, "loss": 1.0503, "step": 1013 }, { "epoch": 0.07816836262719704, "grad_norm": 4.346315383911133, "learning_rate": 9.939433729550354e-06, "loss": 1.2939, "step": 1014 }, { "epoch": 0.078245451742214, "grad_norm": 4.1435322761535645, "learning_rate": 9.939239845778894e-06, "loss": 1.2417, "step": 1015 }, { "epoch": 0.07832254085723096, "grad_norm": 4.223635673522949, "learning_rate": 9.93904565407141e-06, "loss": 1.2014, "step": 1016 }, { "epoch": 0.07839962997224792, "grad_norm": 3.9585742950439453, "learning_rate": 9.938851154440012e-06, "loss": 1.0748, "step": 1017 }, { "epoch": 0.07847671908726488, "grad_norm": 4.183574199676514, "learning_rate": 9.93865634689682e-06, "loss": 1.1727, "step": 1018 }, { "epoch": 0.07855380820228183, "grad_norm": 3.769174098968506, "learning_rate": 9.938461231453985e-06, "loss": 1.2277, "step": 1019 }, { "epoch": 0.0786308973172988, "grad_norm": 3.8185031414031982, "learning_rate": 9.938265808123667e-06, "loss": 1.1434, "step": 1020 }, { "epoch": 0.07870798643231576, "grad_norm": 4.209797382354736, "learning_rate": 9.938070076918056e-06, "loss": 1.1186, "step": 1021 }, { "epoch": 0.07878507554733272, "grad_norm": 4.088057518005371, "learning_rate": 9.937874037849346e-06, "loss": 1.2485, "step": 1022 }, { "epoch": 0.07886216466234967, "grad_norm": 4.1362504959106445, "learning_rate": 9.937677690929766e-06, "loss": 1.1779, "step": 1023 }, { "epoch": 0.07893925377736663, "grad_norm": 4.136228084564209, "learning_rate": 9.937481036171555e-06, "loss": 1.1269, "step": 1024 }, { "epoch": 0.0790163428923836, "grad_norm": 4.334633827209473, "learning_rate": 9.937284073586972e-06, "loss": 1.191, "step": 1025 }, { "epoch": 0.07909343200740056, "grad_norm": 4.252748012542725, "learning_rate": 9.9370868031883e-06, "loss": 1.2082, "step": 1026 }, { "epoch": 0.07917052112241751, "grad_norm": 4.121849060058594, "learning_rate": 9.936889224987834e-06, "loss": 1.2799, "step": 1027 }, { "epoch": 0.07924761023743447, "grad_norm": 4.112194061279297, "learning_rate": 9.936691338997894e-06, "loss": 1.2097, "step": 1028 }, { "epoch": 0.07932469935245143, "grad_norm": 3.915144920349121, "learning_rate": 9.936493145230817e-06, "loss": 1.0446, "step": 1029 }, { "epoch": 0.0794017884674684, "grad_norm": 4.162792682647705, "learning_rate": 9.936294643698958e-06, "loss": 1.1855, "step": 1030 }, { "epoch": 0.07947887758248535, "grad_norm": 4.035287380218506, "learning_rate": 9.936095834414693e-06, "loss": 1.2207, "step": 1031 }, { "epoch": 0.07955596669750231, "grad_norm": 4.192042827606201, "learning_rate": 9.935896717390421e-06, "loss": 1.2242, "step": 1032 }, { "epoch": 0.07963305581251927, "grad_norm": 4.014597415924072, "learning_rate": 9.93569729263855e-06, "loss": 1.1331, "step": 1033 }, { "epoch": 0.07971014492753623, "grad_norm": 3.836013078689575, "learning_rate": 9.935497560171516e-06, "loss": 1.1603, "step": 1034 }, { "epoch": 0.0797872340425532, "grad_norm": 4.218697547912598, "learning_rate": 9.93529752000177e-06, "loss": 1.2531, "step": 1035 }, { "epoch": 0.07986432315757015, "grad_norm": 3.811758518218994, "learning_rate": 9.935097172141785e-06, "loss": 1.1419, "step": 1036 }, { "epoch": 0.07994141227258711, "grad_norm": 4.130055904388428, "learning_rate": 9.93489651660405e-06, "loss": 1.1776, "step": 1037 }, { "epoch": 0.08001850138760407, "grad_norm": 4.011743068695068, "learning_rate": 9.934695553401076e-06, "loss": 1.2096, "step": 1038 }, { "epoch": 0.08009559050262104, "grad_norm": 4.1407928466796875, "learning_rate": 9.934494282545393e-06, "loss": 1.1341, "step": 1039 }, { "epoch": 0.08017267961763799, "grad_norm": 3.8697617053985596, "learning_rate": 9.934292704049546e-06, "loss": 1.1262, "step": 1040 }, { "epoch": 0.08024976873265495, "grad_norm": 5.077935695648193, "learning_rate": 9.934090817926105e-06, "loss": 1.1413, "step": 1041 }, { "epoch": 0.0803268578476719, "grad_norm": 4.142470359802246, "learning_rate": 9.933888624187656e-06, "loss": 1.3075, "step": 1042 }, { "epoch": 0.08040394696268886, "grad_norm": 4.138065814971924, "learning_rate": 9.933686122846804e-06, "loss": 1.1422, "step": 1043 }, { "epoch": 0.08048103607770583, "grad_norm": 4.101687908172607, "learning_rate": 9.933483313916174e-06, "loss": 1.2461, "step": 1044 }, { "epoch": 0.08055812519272279, "grad_norm": 4.085910320281982, "learning_rate": 9.933280197408413e-06, "loss": 1.11, "step": 1045 }, { "epoch": 0.08063521430773975, "grad_norm": 3.8039958477020264, "learning_rate": 9.933076773336179e-06, "loss": 1.1203, "step": 1046 }, { "epoch": 0.0807123034227567, "grad_norm": 4.0165886878967285, "learning_rate": 9.932873041712158e-06, "loss": 1.1276, "step": 1047 }, { "epoch": 0.08078939253777366, "grad_norm": 3.982520818710327, "learning_rate": 9.932669002549052e-06, "loss": 1.1715, "step": 1048 }, { "epoch": 0.08086648165279063, "grad_norm": 4.542549133300781, "learning_rate": 9.93246465585958e-06, "loss": 1.2555, "step": 1049 }, { "epoch": 0.08094357076780759, "grad_norm": 3.6177902221679688, "learning_rate": 9.932260001656482e-06, "loss": 0.995, "step": 1050 }, { "epoch": 0.08102065988282454, "grad_norm": 4.135311603546143, "learning_rate": 9.932055039952518e-06, "loss": 1.2257, "step": 1051 }, { "epoch": 0.0810977489978415, "grad_norm": 4.676593780517578, "learning_rate": 9.931849770760467e-06, "loss": 1.214, "step": 1052 }, { "epoch": 0.08117483811285846, "grad_norm": 3.9388997554779053, "learning_rate": 9.931644194093124e-06, "loss": 1.1846, "step": 1053 }, { "epoch": 0.08125192722787543, "grad_norm": 4.0811896324157715, "learning_rate": 9.931438309963308e-06, "loss": 1.1154, "step": 1054 }, { "epoch": 0.08132901634289239, "grad_norm": 4.8191118240356445, "learning_rate": 9.931232118383854e-06, "loss": 1.2899, "step": 1055 }, { "epoch": 0.08140610545790934, "grad_norm": 4.420556545257568, "learning_rate": 9.931025619367617e-06, "loss": 1.2523, "step": 1056 }, { "epoch": 0.0814831945729263, "grad_norm": 4.228475093841553, "learning_rate": 9.930818812927471e-06, "loss": 1.2077, "step": 1057 }, { "epoch": 0.08156028368794327, "grad_norm": 4.304680347442627, "learning_rate": 9.93061169907631e-06, "loss": 1.1991, "step": 1058 }, { "epoch": 0.08163737280296023, "grad_norm": 4.341451168060303, "learning_rate": 9.930404277827044e-06, "loss": 1.1833, "step": 1059 }, { "epoch": 0.08171446191797718, "grad_norm": 3.8857333660125732, "learning_rate": 9.930196549192608e-06, "loss": 1.1814, "step": 1060 }, { "epoch": 0.08179155103299414, "grad_norm": 3.722275972366333, "learning_rate": 9.929988513185952e-06, "loss": 1.199, "step": 1061 }, { "epoch": 0.0818686401480111, "grad_norm": 3.972513437271118, "learning_rate": 9.929780169820043e-06, "loss": 1.0716, "step": 1062 }, { "epoch": 0.08194572926302807, "grad_norm": 3.907376289367676, "learning_rate": 9.929571519107873e-06, "loss": 1.0642, "step": 1063 }, { "epoch": 0.08202281837804502, "grad_norm": 3.913541793823242, "learning_rate": 9.92936256106245e-06, "loss": 1.1794, "step": 1064 }, { "epoch": 0.08209990749306198, "grad_norm": 4.19666862487793, "learning_rate": 9.929153295696803e-06, "loss": 1.2815, "step": 1065 }, { "epoch": 0.08217699660807894, "grad_norm": 4.4634175300598145, "learning_rate": 9.928943723023973e-06, "loss": 1.2451, "step": 1066 }, { "epoch": 0.0822540857230959, "grad_norm": 3.9684886932373047, "learning_rate": 9.928733843057033e-06, "loss": 1.1221, "step": 1067 }, { "epoch": 0.08233117483811286, "grad_norm": 3.8272829055786133, "learning_rate": 9.928523655809062e-06, "loss": 1.1193, "step": 1068 }, { "epoch": 0.08240826395312982, "grad_norm": 4.0728325843811035, "learning_rate": 9.928313161293168e-06, "loss": 1.1795, "step": 1069 }, { "epoch": 0.08248535306814678, "grad_norm": 3.951913595199585, "learning_rate": 9.928102359522473e-06, "loss": 1.1846, "step": 1070 }, { "epoch": 0.08256244218316373, "grad_norm": 3.7784245014190674, "learning_rate": 9.927891250510118e-06, "loss": 1.1188, "step": 1071 }, { "epoch": 0.08263953129818069, "grad_norm": 4.366214752197266, "learning_rate": 9.927679834269266e-06, "loss": 1.1808, "step": 1072 }, { "epoch": 0.08271662041319766, "grad_norm": 4.112607002258301, "learning_rate": 9.927468110813099e-06, "loss": 1.1893, "step": 1073 }, { "epoch": 0.08279370952821462, "grad_norm": 3.9396910667419434, "learning_rate": 9.927256080154813e-06, "loss": 1.2093, "step": 1074 }, { "epoch": 0.08287079864323157, "grad_norm": 4.674061298370361, "learning_rate": 9.927043742307631e-06, "loss": 1.2488, "step": 1075 }, { "epoch": 0.08294788775824853, "grad_norm": 5.152288436889648, "learning_rate": 9.926831097284788e-06, "loss": 1.2201, "step": 1076 }, { "epoch": 0.08302497687326549, "grad_norm": 3.9294207096099854, "learning_rate": 9.926618145099544e-06, "loss": 1.2437, "step": 1077 }, { "epoch": 0.08310206598828246, "grad_norm": 4.059928894042969, "learning_rate": 9.926404885765175e-06, "loss": 1.1745, "step": 1078 }, { "epoch": 0.08317915510329942, "grad_norm": 4.130738735198975, "learning_rate": 9.926191319294974e-06, "loss": 1.2372, "step": 1079 }, { "epoch": 0.08325624421831637, "grad_norm": 4.510885238647461, "learning_rate": 9.92597744570226e-06, "loss": 1.3444, "step": 1080 }, { "epoch": 0.08333333333333333, "grad_norm": 3.76959490776062, "learning_rate": 9.92576326500036e-06, "loss": 1.0194, "step": 1081 }, { "epoch": 0.0834104224483503, "grad_norm": 4.379603862762451, "learning_rate": 9.925548777202636e-06, "loss": 1.2125, "step": 1082 }, { "epoch": 0.08348751156336726, "grad_norm": 4.4609375, "learning_rate": 9.925333982322456e-06, "loss": 1.2694, "step": 1083 }, { "epoch": 0.08356460067838421, "grad_norm": 4.134787082672119, "learning_rate": 9.925118880373208e-06, "loss": 1.188, "step": 1084 }, { "epoch": 0.08364168979340117, "grad_norm": 3.97082781791687, "learning_rate": 9.924903471368308e-06, "loss": 1.0874, "step": 1085 }, { "epoch": 0.08371877890841813, "grad_norm": 3.723400592803955, "learning_rate": 9.924687755321183e-06, "loss": 1.2485, "step": 1086 }, { "epoch": 0.0837958680234351, "grad_norm": 4.063663005828857, "learning_rate": 9.92447173224528e-06, "loss": 1.1585, "step": 1087 }, { "epoch": 0.08387295713845205, "grad_norm": 4.590901851654053, "learning_rate": 9.924255402154072e-06, "loss": 1.2752, "step": 1088 }, { "epoch": 0.08395004625346901, "grad_norm": 3.818544626235962, "learning_rate": 9.924038765061042e-06, "loss": 1.0903, "step": 1089 }, { "epoch": 0.08402713536848597, "grad_norm": 4.31704044342041, "learning_rate": 9.923821820979695e-06, "loss": 1.2734, "step": 1090 }, { "epoch": 0.08410422448350292, "grad_norm": 4.051934719085693, "learning_rate": 9.923604569923562e-06, "loss": 1.304, "step": 1091 }, { "epoch": 0.0841813135985199, "grad_norm": 3.77911376953125, "learning_rate": 9.923387011906183e-06, "loss": 1.1781, "step": 1092 }, { "epoch": 0.08425840271353685, "grad_norm": 4.27567720413208, "learning_rate": 9.923169146941121e-06, "loss": 1.2181, "step": 1093 }, { "epoch": 0.08433549182855381, "grad_norm": 4.199497699737549, "learning_rate": 9.922950975041963e-06, "loss": 1.2001, "step": 1094 }, { "epoch": 0.08441258094357076, "grad_norm": 4.348501682281494, "learning_rate": 9.922732496222306e-06, "loss": 1.3706, "step": 1095 }, { "epoch": 0.08448967005858772, "grad_norm": 3.888430118560791, "learning_rate": 9.922513710495774e-06, "loss": 1.2277, "step": 1096 }, { "epoch": 0.08456675917360469, "grad_norm": 4.729867935180664, "learning_rate": 9.922294617876007e-06, "loss": 1.3851, "step": 1097 }, { "epoch": 0.08464384828862165, "grad_norm": 4.120565891265869, "learning_rate": 9.922075218376664e-06, "loss": 1.1222, "step": 1098 }, { "epoch": 0.0847209374036386, "grad_norm": 4.8957839012146, "learning_rate": 9.921855512011422e-06, "loss": 1.2947, "step": 1099 }, { "epoch": 0.08479802651865556, "grad_norm": 4.164621829986572, "learning_rate": 9.921635498793983e-06, "loss": 1.0555, "step": 1100 }, { "epoch": 0.08487511563367253, "grad_norm": 4.292223930358887, "learning_rate": 9.921415178738056e-06, "loss": 1.2633, "step": 1101 }, { "epoch": 0.08495220474868949, "grad_norm": 4.393791198730469, "learning_rate": 9.921194551857384e-06, "loss": 1.2356, "step": 1102 }, { "epoch": 0.08502929386370645, "grad_norm": 4.085275650024414, "learning_rate": 9.920973618165719e-06, "loss": 1.072, "step": 1103 }, { "epoch": 0.0851063829787234, "grad_norm": 4.277721881866455, "learning_rate": 9.920752377676835e-06, "loss": 1.1315, "step": 1104 }, { "epoch": 0.08518347209374036, "grad_norm": 4.160389423370361, "learning_rate": 9.920530830404525e-06, "loss": 1.2079, "step": 1105 }, { "epoch": 0.08526056120875733, "grad_norm": 4.0644917488098145, "learning_rate": 9.920308976362602e-06, "loss": 1.1534, "step": 1106 }, { "epoch": 0.08533765032377429, "grad_norm": 4.176297187805176, "learning_rate": 9.920086815564898e-06, "loss": 1.3915, "step": 1107 }, { "epoch": 0.08541473943879124, "grad_norm": 4.304412841796875, "learning_rate": 9.919864348025261e-06, "loss": 1.275, "step": 1108 }, { "epoch": 0.0854918285538082, "grad_norm": 4.072107791900635, "learning_rate": 9.919641573757563e-06, "loss": 1.1173, "step": 1109 }, { "epoch": 0.08556891766882516, "grad_norm": 4.24763822555542, "learning_rate": 9.919418492775694e-06, "loss": 1.1268, "step": 1110 }, { "epoch": 0.08564600678384213, "grad_norm": 4.156879901885986, "learning_rate": 9.91919510509356e-06, "loss": 1.1882, "step": 1111 }, { "epoch": 0.08572309589885908, "grad_norm": 4.036397933959961, "learning_rate": 9.918971410725089e-06, "loss": 1.2513, "step": 1112 }, { "epoch": 0.08580018501387604, "grad_norm": 4.336670398712158, "learning_rate": 9.918747409684225e-06, "loss": 1.0309, "step": 1113 }, { "epoch": 0.085877274128893, "grad_norm": 4.337256908416748, "learning_rate": 9.918523101984933e-06, "loss": 1.1959, "step": 1114 }, { "epoch": 0.08595436324390995, "grad_norm": 3.9590811729431152, "learning_rate": 9.918298487641202e-06, "loss": 1.15, "step": 1115 }, { "epoch": 0.08603145235892692, "grad_norm": 4.2955522537231445, "learning_rate": 9.918073566667033e-06, "loss": 1.1446, "step": 1116 }, { "epoch": 0.08610854147394388, "grad_norm": 4.317296028137207, "learning_rate": 9.917848339076448e-06, "loss": 1.2451, "step": 1117 }, { "epoch": 0.08618563058896084, "grad_norm": 4.0374755859375, "learning_rate": 9.917622804883488e-06, "loss": 1.1549, "step": 1118 }, { "epoch": 0.0862627197039778, "grad_norm": 3.970003366470337, "learning_rate": 9.917396964102218e-06, "loss": 1.123, "step": 1119 }, { "epoch": 0.08633980881899475, "grad_norm": 3.7095046043395996, "learning_rate": 9.917170816746713e-06, "loss": 1.1372, "step": 1120 }, { "epoch": 0.08641689793401172, "grad_norm": 4.35107946395874, "learning_rate": 9.916944362831073e-06, "loss": 1.2134, "step": 1121 }, { "epoch": 0.08649398704902868, "grad_norm": 4.03241491317749, "learning_rate": 9.91671760236942e-06, "loss": 1.119, "step": 1122 }, { "epoch": 0.08657107616404563, "grad_norm": 3.650089979171753, "learning_rate": 9.916490535375889e-06, "loss": 1.1689, "step": 1123 }, { "epoch": 0.08664816527906259, "grad_norm": 4.5127787590026855, "learning_rate": 9.916263161864634e-06, "loss": 1.2254, "step": 1124 }, { "epoch": 0.08672525439407956, "grad_norm": 4.502647399902344, "learning_rate": 9.916035481849834e-06, "loss": 1.3046, "step": 1125 }, { "epoch": 0.08680234350909652, "grad_norm": 4.246219635009766, "learning_rate": 9.915807495345682e-06, "loss": 1.1592, "step": 1126 }, { "epoch": 0.08687943262411348, "grad_norm": 3.7990305423736572, "learning_rate": 9.915579202366393e-06, "loss": 1.2351, "step": 1127 }, { "epoch": 0.08695652173913043, "grad_norm": 3.929375648498535, "learning_rate": 9.915350602926198e-06, "loss": 1.1652, "step": 1128 }, { "epoch": 0.08703361085414739, "grad_norm": 3.9628517627716064, "learning_rate": 9.915121697039352e-06, "loss": 1.0718, "step": 1129 }, { "epoch": 0.08711069996916436, "grad_norm": 4.691619396209717, "learning_rate": 9.914892484720124e-06, "loss": 1.2074, "step": 1130 }, { "epoch": 0.08718778908418132, "grad_norm": 3.8248648643493652, "learning_rate": 9.914662965982803e-06, "loss": 1.0582, "step": 1131 }, { "epoch": 0.08726487819919827, "grad_norm": 4.224147319793701, "learning_rate": 9.914433140841702e-06, "loss": 1.1567, "step": 1132 }, { "epoch": 0.08734196731421523, "grad_norm": 4.157731533050537, "learning_rate": 9.914203009311146e-06, "loss": 1.1007, "step": 1133 }, { "epoch": 0.08741905642923219, "grad_norm": 4.48982048034668, "learning_rate": 9.913972571405482e-06, "loss": 1.3227, "step": 1134 }, { "epoch": 0.08749614554424916, "grad_norm": 3.823336362838745, "learning_rate": 9.913741827139081e-06, "loss": 1.2045, "step": 1135 }, { "epoch": 0.08757323465926611, "grad_norm": 4.46881103515625, "learning_rate": 9.913510776526324e-06, "loss": 1.2047, "step": 1136 }, { "epoch": 0.08765032377428307, "grad_norm": 4.13620662689209, "learning_rate": 9.913279419581619e-06, "loss": 1.2527, "step": 1137 }, { "epoch": 0.08772741288930003, "grad_norm": 3.984325408935547, "learning_rate": 9.913047756319388e-06, "loss": 1.1912, "step": 1138 }, { "epoch": 0.08780450200431698, "grad_norm": 4.111431121826172, "learning_rate": 9.912815786754075e-06, "loss": 1.2131, "step": 1139 }, { "epoch": 0.08788159111933395, "grad_norm": 3.925600290298462, "learning_rate": 9.912583510900142e-06, "loss": 1.0794, "step": 1140 }, { "epoch": 0.08795868023435091, "grad_norm": 3.7339839935302734, "learning_rate": 9.91235092877207e-06, "loss": 1.1662, "step": 1141 }, { "epoch": 0.08803576934936787, "grad_norm": 3.6147632598876953, "learning_rate": 9.912118040384358e-06, "loss": 1.0365, "step": 1142 }, { "epoch": 0.08811285846438482, "grad_norm": 4.1122355461120605, "learning_rate": 9.911884845751529e-06, "loss": 1.2303, "step": 1143 }, { "epoch": 0.0881899475794018, "grad_norm": 4.269367218017578, "learning_rate": 9.911651344888117e-06, "loss": 1.217, "step": 1144 }, { "epoch": 0.08826703669441875, "grad_norm": 3.916430711746216, "learning_rate": 9.911417537808684e-06, "loss": 1.1381, "step": 1145 }, { "epoch": 0.08834412580943571, "grad_norm": 3.4542276859283447, "learning_rate": 9.911183424527802e-06, "loss": 1.0333, "step": 1146 }, { "epoch": 0.08842121492445267, "grad_norm": 3.716357707977295, "learning_rate": 9.91094900506007e-06, "loss": 1.228, "step": 1147 }, { "epoch": 0.08849830403946962, "grad_norm": 4.024401664733887, "learning_rate": 9.910714279420103e-06, "loss": 1.1965, "step": 1148 }, { "epoch": 0.08857539315448659, "grad_norm": 3.9577510356903076, "learning_rate": 9.910479247622534e-06, "loss": 1.1488, "step": 1149 }, { "epoch": 0.08865248226950355, "grad_norm": 4.212368011474609, "learning_rate": 9.910243909682014e-06, "loss": 1.2137, "step": 1150 }, { "epoch": 0.0887295713845205, "grad_norm": 3.935518503189087, "learning_rate": 9.910008265613219e-06, "loss": 1.0983, "step": 1151 }, { "epoch": 0.08880666049953746, "grad_norm": 4.040730953216553, "learning_rate": 9.909772315430837e-06, "loss": 1.1936, "step": 1152 }, { "epoch": 0.08888374961455442, "grad_norm": 3.9304912090301514, "learning_rate": 9.90953605914958e-06, "loss": 1.2163, "step": 1153 }, { "epoch": 0.08896083872957139, "grad_norm": 3.890301465988159, "learning_rate": 9.909299496784177e-06, "loss": 1.2424, "step": 1154 }, { "epoch": 0.08903792784458835, "grad_norm": 3.953073263168335, "learning_rate": 9.909062628349375e-06, "loss": 1.2468, "step": 1155 }, { "epoch": 0.0891150169596053, "grad_norm": 4.048395156860352, "learning_rate": 9.908825453859944e-06, "loss": 1.2238, "step": 1156 }, { "epoch": 0.08919210607462226, "grad_norm": 3.6528916358947754, "learning_rate": 9.90858797333067e-06, "loss": 1.1587, "step": 1157 }, { "epoch": 0.08926919518963922, "grad_norm": 3.6531436443328857, "learning_rate": 9.908350186776357e-06, "loss": 1.1732, "step": 1158 }, { "epoch": 0.08934628430465619, "grad_norm": 4.267482280731201, "learning_rate": 9.908112094211831e-06, "loss": 1.259, "step": 1159 }, { "epoch": 0.08942337341967314, "grad_norm": 4.037517547607422, "learning_rate": 9.907873695651935e-06, "loss": 1.1787, "step": 1160 }, { "epoch": 0.0895004625346901, "grad_norm": 3.696422815322876, "learning_rate": 9.907634991111535e-06, "loss": 1.0961, "step": 1161 }, { "epoch": 0.08957755164970706, "grad_norm": 4.135885238647461, "learning_rate": 9.907395980605509e-06, "loss": 1.2558, "step": 1162 }, { "epoch": 0.08965464076472403, "grad_norm": 4.450069904327393, "learning_rate": 9.907156664148761e-06, "loss": 1.2009, "step": 1163 }, { "epoch": 0.08973172987974098, "grad_norm": 3.922731876373291, "learning_rate": 9.906917041756208e-06, "loss": 1.1875, "step": 1164 }, { "epoch": 0.08980881899475794, "grad_norm": 4.55860710144043, "learning_rate": 9.90667711344279e-06, "loss": 1.2364, "step": 1165 }, { "epoch": 0.0898859081097749, "grad_norm": 4.525522232055664, "learning_rate": 9.90643687922347e-06, "loss": 1.2342, "step": 1166 }, { "epoch": 0.08996299722479185, "grad_norm": 4.051623344421387, "learning_rate": 9.90619633911322e-06, "loss": 1.2236, "step": 1167 }, { "epoch": 0.09004008633980883, "grad_norm": 3.817911148071289, "learning_rate": 9.905955493127037e-06, "loss": 1.0846, "step": 1168 }, { "epoch": 0.09011717545482578, "grad_norm": 3.949352979660034, "learning_rate": 9.905714341279938e-06, "loss": 1.1237, "step": 1169 }, { "epoch": 0.09019426456984274, "grad_norm": 3.8742692470550537, "learning_rate": 9.905472883586958e-06, "loss": 1.0737, "step": 1170 }, { "epoch": 0.0902713536848597, "grad_norm": 4.435111999511719, "learning_rate": 9.905231120063149e-06, "loss": 1.3749, "step": 1171 }, { "epoch": 0.09034844279987665, "grad_norm": 3.793482780456543, "learning_rate": 9.904989050723583e-06, "loss": 1.1484, "step": 1172 }, { "epoch": 0.09042553191489362, "grad_norm": 4.095993995666504, "learning_rate": 9.904746675583356e-06, "loss": 1.1579, "step": 1173 }, { "epoch": 0.09050262102991058, "grad_norm": 4.051393032073975, "learning_rate": 9.904503994657574e-06, "loss": 1.1387, "step": 1174 }, { "epoch": 0.09057971014492754, "grad_norm": 3.8025002479553223, "learning_rate": 9.90426100796137e-06, "loss": 1.0579, "step": 1175 }, { "epoch": 0.09065679925994449, "grad_norm": 4.1807708740234375, "learning_rate": 9.904017715509893e-06, "loss": 1.2583, "step": 1176 }, { "epoch": 0.09073388837496145, "grad_norm": 3.890801191329956, "learning_rate": 9.90377411731831e-06, "loss": 1.1381, "step": 1177 }, { "epoch": 0.09081097748997842, "grad_norm": 3.741750955581665, "learning_rate": 9.903530213401806e-06, "loss": 1.1599, "step": 1178 }, { "epoch": 0.09088806660499538, "grad_norm": 4.385606288909912, "learning_rate": 9.903286003775592e-06, "loss": 1.2269, "step": 1179 }, { "epoch": 0.09096515572001233, "grad_norm": 4.160192012786865, "learning_rate": 9.903041488454888e-06, "loss": 1.0707, "step": 1180 }, { "epoch": 0.09104224483502929, "grad_norm": 4.092873573303223, "learning_rate": 9.90279666745494e-06, "loss": 1.2221, "step": 1181 }, { "epoch": 0.09111933395004625, "grad_norm": 4.053955078125, "learning_rate": 9.902551540791016e-06, "loss": 1.1687, "step": 1182 }, { "epoch": 0.09119642306506322, "grad_norm": 4.5065789222717285, "learning_rate": 9.902306108478393e-06, "loss": 1.004, "step": 1183 }, { "epoch": 0.09127351218008017, "grad_norm": 4.21845006942749, "learning_rate": 9.902060370532371e-06, "loss": 1.2317, "step": 1184 }, { "epoch": 0.09135060129509713, "grad_norm": 3.860323190689087, "learning_rate": 9.901814326968276e-06, "loss": 1.1142, "step": 1185 }, { "epoch": 0.09142769041011409, "grad_norm": 4.450876712799072, "learning_rate": 9.901567977801444e-06, "loss": 1.1097, "step": 1186 }, { "epoch": 0.09150477952513106, "grad_norm": 3.953404664993286, "learning_rate": 9.901321323047235e-06, "loss": 1.0921, "step": 1187 }, { "epoch": 0.09158186864014801, "grad_norm": 3.6998918056488037, "learning_rate": 9.901074362721024e-06, "loss": 1.2101, "step": 1188 }, { "epoch": 0.09165895775516497, "grad_norm": 3.8323581218719482, "learning_rate": 9.900827096838213e-06, "loss": 1.1592, "step": 1189 }, { "epoch": 0.09173604687018193, "grad_norm": 3.705106496810913, "learning_rate": 9.900579525414213e-06, "loss": 1.1281, "step": 1190 }, { "epoch": 0.09181313598519888, "grad_norm": 3.9382426738739014, "learning_rate": 9.900331648464459e-06, "loss": 1.1911, "step": 1191 }, { "epoch": 0.09189022510021586, "grad_norm": 4.3277740478515625, "learning_rate": 9.900083466004409e-06, "loss": 1.2685, "step": 1192 }, { "epoch": 0.09196731421523281, "grad_norm": 3.8461456298828125, "learning_rate": 9.899834978049531e-06, "loss": 1.2131, "step": 1193 }, { "epoch": 0.09204440333024977, "grad_norm": 4.075659275054932, "learning_rate": 9.89958618461532e-06, "loss": 1.171, "step": 1194 }, { "epoch": 0.09212149244526673, "grad_norm": 3.7878012657165527, "learning_rate": 9.899337085717284e-06, "loss": 1.1431, "step": 1195 }, { "epoch": 0.09219858156028368, "grad_norm": 3.931734800338745, "learning_rate": 9.899087681370958e-06, "loss": 1.2347, "step": 1196 }, { "epoch": 0.09227567067530065, "grad_norm": 4.06639289855957, "learning_rate": 9.898837971591885e-06, "loss": 1.2163, "step": 1197 }, { "epoch": 0.09235275979031761, "grad_norm": 3.754260301589966, "learning_rate": 9.89858795639564e-06, "loss": 1.1521, "step": 1198 }, { "epoch": 0.09242984890533457, "grad_norm": 4.012813568115234, "learning_rate": 9.898337635797803e-06, "loss": 1.1976, "step": 1199 }, { "epoch": 0.09250693802035152, "grad_norm": 3.8789308071136475, "learning_rate": 9.898087009813985e-06, "loss": 1.1611, "step": 1200 }, { "epoch": 0.09258402713536848, "grad_norm": 3.8602101802825928, "learning_rate": 9.89783607845981e-06, "loss": 1.2561, "step": 1201 }, { "epoch": 0.09266111625038545, "grad_norm": 3.982560157775879, "learning_rate": 9.897584841750922e-06, "loss": 1.2404, "step": 1202 }, { "epoch": 0.0927382053654024, "grad_norm": 4.090329170227051, "learning_rate": 9.897333299702982e-06, "loss": 1.1678, "step": 1203 }, { "epoch": 0.09281529448041936, "grad_norm": 4.530256271362305, "learning_rate": 9.897081452331677e-06, "loss": 1.2437, "step": 1204 }, { "epoch": 0.09289238359543632, "grad_norm": 4.283809185028076, "learning_rate": 9.896829299652705e-06, "loss": 1.1977, "step": 1205 }, { "epoch": 0.09296947271045329, "grad_norm": 3.9148154258728027, "learning_rate": 9.896576841681792e-06, "loss": 1.2863, "step": 1206 }, { "epoch": 0.09304656182547025, "grad_norm": 3.6620705127716064, "learning_rate": 9.896324078434668e-06, "loss": 1.1627, "step": 1207 }, { "epoch": 0.0931236509404872, "grad_norm": 4.204239368438721, "learning_rate": 9.896071009927098e-06, "loss": 1.166, "step": 1208 }, { "epoch": 0.09320074005550416, "grad_norm": 4.137142658233643, "learning_rate": 9.895817636174857e-06, "loss": 1.2225, "step": 1209 }, { "epoch": 0.09327782917052112, "grad_norm": 4.27466344833374, "learning_rate": 9.895563957193744e-06, "loss": 1.1555, "step": 1210 }, { "epoch": 0.09335491828553809, "grad_norm": 4.381038188934326, "learning_rate": 9.89530997299957e-06, "loss": 1.2687, "step": 1211 }, { "epoch": 0.09343200740055504, "grad_norm": 3.764836549758911, "learning_rate": 9.895055683608175e-06, "loss": 1.0278, "step": 1212 }, { "epoch": 0.093509096515572, "grad_norm": 4.309133529663086, "learning_rate": 9.89480108903541e-06, "loss": 1.217, "step": 1213 }, { "epoch": 0.09358618563058896, "grad_norm": 4.008470058441162, "learning_rate": 9.894546189297148e-06, "loss": 1.1429, "step": 1214 }, { "epoch": 0.09366327474560592, "grad_norm": 3.7469325065612793, "learning_rate": 9.894290984409281e-06, "loss": 1.1331, "step": 1215 }, { "epoch": 0.09374036386062289, "grad_norm": 4.368958473205566, "learning_rate": 9.894035474387719e-06, "loss": 1.2149, "step": 1216 }, { "epoch": 0.09381745297563984, "grad_norm": 4.350530624389648, "learning_rate": 9.893779659248393e-06, "loss": 1.2107, "step": 1217 }, { "epoch": 0.0938945420906568, "grad_norm": 3.9404361248016357, "learning_rate": 9.893523539007248e-06, "loss": 1.1363, "step": 1218 }, { "epoch": 0.09397163120567376, "grad_norm": 4.558434009552002, "learning_rate": 9.893267113680257e-06, "loss": 1.1958, "step": 1219 }, { "epoch": 0.09404872032069071, "grad_norm": 4.263239860534668, "learning_rate": 9.893010383283404e-06, "loss": 1.2427, "step": 1220 }, { "epoch": 0.09412580943570768, "grad_norm": 4.081235408782959, "learning_rate": 9.892753347832695e-06, "loss": 1.1318, "step": 1221 }, { "epoch": 0.09420289855072464, "grad_norm": 3.902259111404419, "learning_rate": 9.892496007344155e-06, "loss": 1.225, "step": 1222 }, { "epoch": 0.0942799876657416, "grad_norm": 4.030986785888672, "learning_rate": 9.892238361833826e-06, "loss": 1.1857, "step": 1223 }, { "epoch": 0.09435707678075855, "grad_norm": 4.28933048248291, "learning_rate": 9.891980411317774e-06, "loss": 1.2686, "step": 1224 }, { "epoch": 0.09443416589577551, "grad_norm": 3.7828292846679688, "learning_rate": 9.89172215581208e-06, "loss": 1.1151, "step": 1225 }, { "epoch": 0.09451125501079248, "grad_norm": 3.7171690464019775, "learning_rate": 9.891463595332844e-06, "loss": 1.18, "step": 1226 }, { "epoch": 0.09458834412580944, "grad_norm": 4.4071478843688965, "learning_rate": 9.891204729896187e-06, "loss": 1.1709, "step": 1227 }, { "epoch": 0.0946654332408264, "grad_norm": 3.7841458320617676, "learning_rate": 9.890945559518247e-06, "loss": 1.234, "step": 1228 }, { "epoch": 0.09474252235584335, "grad_norm": 4.163311004638672, "learning_rate": 9.890686084215182e-06, "loss": 1.1784, "step": 1229 }, { "epoch": 0.09481961147086032, "grad_norm": 4.381045341491699, "learning_rate": 9.89042630400317e-06, "loss": 1.1428, "step": 1230 }, { "epoch": 0.09489670058587728, "grad_norm": 4.111490726470947, "learning_rate": 9.890166218898405e-06, "loss": 1.2638, "step": 1231 }, { "epoch": 0.09497378970089423, "grad_norm": 4.06538200378418, "learning_rate": 9.889905828917103e-06, "loss": 1.2292, "step": 1232 }, { "epoch": 0.09505087881591119, "grad_norm": 3.760375499725342, "learning_rate": 9.8896451340755e-06, "loss": 1.1367, "step": 1233 }, { "epoch": 0.09512796793092815, "grad_norm": 3.769625186920166, "learning_rate": 9.889384134389844e-06, "loss": 1.2137, "step": 1234 }, { "epoch": 0.09520505704594512, "grad_norm": 4.986996650695801, "learning_rate": 9.889122829876412e-06, "loss": 1.2135, "step": 1235 }, { "epoch": 0.09528214616096208, "grad_norm": 3.983733654022217, "learning_rate": 9.888861220551494e-06, "loss": 1.1738, "step": 1236 }, { "epoch": 0.09535923527597903, "grad_norm": 3.9600632190704346, "learning_rate": 9.888599306431397e-06, "loss": 1.0924, "step": 1237 }, { "epoch": 0.09543632439099599, "grad_norm": 3.82342791557312, "learning_rate": 9.888337087532452e-06, "loss": 1.2129, "step": 1238 }, { "epoch": 0.09551341350601295, "grad_norm": 4.06931734085083, "learning_rate": 9.888074563871007e-06, "loss": 1.0888, "step": 1239 }, { "epoch": 0.09559050262102992, "grad_norm": 3.582798719406128, "learning_rate": 9.88781173546343e-06, "loss": 1.0759, "step": 1240 }, { "epoch": 0.09566759173604687, "grad_norm": 4.151041507720947, "learning_rate": 9.887548602326104e-06, "loss": 1.09, "step": 1241 }, { "epoch": 0.09574468085106383, "grad_norm": 4.339078426361084, "learning_rate": 9.887285164475438e-06, "loss": 1.1669, "step": 1242 }, { "epoch": 0.09582176996608079, "grad_norm": 4.104028701782227, "learning_rate": 9.887021421927853e-06, "loss": 1.1578, "step": 1243 }, { "epoch": 0.09589885908109774, "grad_norm": 4.224393844604492, "learning_rate": 9.886757374699792e-06, "loss": 1.1235, "step": 1244 }, { "epoch": 0.09597594819611471, "grad_norm": 3.995770215988159, "learning_rate": 9.88649302280772e-06, "loss": 1.1039, "step": 1245 }, { "epoch": 0.09605303731113167, "grad_norm": 4.252878665924072, "learning_rate": 9.886228366268114e-06, "loss": 1.1712, "step": 1246 }, { "epoch": 0.09613012642614863, "grad_norm": 4.067622661590576, "learning_rate": 9.885963405097477e-06, "loss": 1.2701, "step": 1247 }, { "epoch": 0.09620721554116558, "grad_norm": 4.1838765144348145, "learning_rate": 9.885698139312326e-06, "loss": 1.189, "step": 1248 }, { "epoch": 0.09628430465618255, "grad_norm": 3.9121227264404297, "learning_rate": 9.8854325689292e-06, "loss": 1.1588, "step": 1249 }, { "epoch": 0.09636139377119951, "grad_norm": 4.973738193511963, "learning_rate": 9.885166693964654e-06, "loss": 1.3419, "step": 1250 }, { "epoch": 0.09643848288621647, "grad_norm": 3.7892560958862305, "learning_rate": 9.884900514435266e-06, "loss": 1.1613, "step": 1251 }, { "epoch": 0.09651557200123342, "grad_norm": 3.8883039951324463, "learning_rate": 9.884634030357634e-06, "loss": 1.0897, "step": 1252 }, { "epoch": 0.09659266111625038, "grad_norm": 3.8611433506011963, "learning_rate": 9.884367241748364e-06, "loss": 1.1545, "step": 1253 }, { "epoch": 0.09666975023126735, "grad_norm": 3.8657636642456055, "learning_rate": 9.884100148624096e-06, "loss": 1.2268, "step": 1254 }, { "epoch": 0.09674683934628431, "grad_norm": 3.610783576965332, "learning_rate": 9.883832751001479e-06, "loss": 1.0718, "step": 1255 }, { "epoch": 0.09682392846130126, "grad_norm": 3.7750887870788574, "learning_rate": 9.883565048897183e-06, "loss": 1.1832, "step": 1256 }, { "epoch": 0.09690101757631822, "grad_norm": 4.035364627838135, "learning_rate": 9.883297042327899e-06, "loss": 1.2015, "step": 1257 }, { "epoch": 0.09697810669133518, "grad_norm": 3.7171030044555664, "learning_rate": 9.883028731310335e-06, "loss": 1.1283, "step": 1258 }, { "epoch": 0.09705519580635215, "grad_norm": 3.9022812843322754, "learning_rate": 9.88276011586122e-06, "loss": 1.18, "step": 1259 }, { "epoch": 0.0971322849213691, "grad_norm": 4.099169731140137, "learning_rate": 9.882491195997301e-06, "loss": 1.1186, "step": 1260 }, { "epoch": 0.09720937403638606, "grad_norm": 4.0367326736450195, "learning_rate": 9.882221971735343e-06, "loss": 1.1546, "step": 1261 }, { "epoch": 0.09728646315140302, "grad_norm": 3.8437790870666504, "learning_rate": 9.88195244309213e-06, "loss": 1.1853, "step": 1262 }, { "epoch": 0.09736355226641998, "grad_norm": 3.9312708377838135, "learning_rate": 9.881682610084467e-06, "loss": 1.2333, "step": 1263 }, { "epoch": 0.09744064138143695, "grad_norm": 4.007068157196045, "learning_rate": 9.881412472729175e-06, "loss": 1.1373, "step": 1264 }, { "epoch": 0.0975177304964539, "grad_norm": 4.4400763511657715, "learning_rate": 9.881142031043098e-06, "loss": 1.3182, "step": 1265 }, { "epoch": 0.09759481961147086, "grad_norm": 4.121272563934326, "learning_rate": 9.880871285043095e-06, "loss": 1.301, "step": 1266 }, { "epoch": 0.09767190872648782, "grad_norm": 3.7698986530303955, "learning_rate": 9.880600234746047e-06, "loss": 1.132, "step": 1267 }, { "epoch": 0.09774899784150477, "grad_norm": 4.014803886413574, "learning_rate": 9.88032888016885e-06, "loss": 1.1003, "step": 1268 }, { "epoch": 0.09782608695652174, "grad_norm": 3.873784303665161, "learning_rate": 9.880057221328425e-06, "loss": 1.1009, "step": 1269 }, { "epoch": 0.0979031760715387, "grad_norm": 4.115602970123291, "learning_rate": 9.879785258241705e-06, "loss": 1.3411, "step": 1270 }, { "epoch": 0.09798026518655566, "grad_norm": 3.987370491027832, "learning_rate": 9.879512990925648e-06, "loss": 1.1694, "step": 1271 }, { "epoch": 0.09805735430157261, "grad_norm": 3.914703607559204, "learning_rate": 9.879240419397227e-06, "loss": 1.2862, "step": 1272 }, { "epoch": 0.09813444341658958, "grad_norm": 4.038669586181641, "learning_rate": 9.878967543673436e-06, "loss": 1.2318, "step": 1273 }, { "epoch": 0.09821153253160654, "grad_norm": 3.7776105403900146, "learning_rate": 9.878694363771289e-06, "loss": 1.0864, "step": 1274 }, { "epoch": 0.0982886216466235, "grad_norm": 3.926527976989746, "learning_rate": 9.878420879707816e-06, "loss": 1.1557, "step": 1275 }, { "epoch": 0.09836571076164045, "grad_norm": 4.13185977935791, "learning_rate": 9.878147091500065e-06, "loss": 1.2331, "step": 1276 }, { "epoch": 0.09844279987665741, "grad_norm": 4.489853858947754, "learning_rate": 9.877872999165109e-06, "loss": 1.2494, "step": 1277 }, { "epoch": 0.09851988899167438, "grad_norm": 4.077756404876709, "learning_rate": 9.877598602720034e-06, "loss": 1.1705, "step": 1278 }, { "epoch": 0.09859697810669134, "grad_norm": 4.43337345123291, "learning_rate": 9.877323902181949e-06, "loss": 1.1507, "step": 1279 }, { "epoch": 0.0986740672217083, "grad_norm": 4.278952598571777, "learning_rate": 9.877048897567977e-06, "loss": 1.1005, "step": 1280 }, { "epoch": 0.09875115633672525, "grad_norm": 3.6082239151000977, "learning_rate": 9.876773588895265e-06, "loss": 1.1287, "step": 1281 }, { "epoch": 0.09882824545174221, "grad_norm": 3.5894458293914795, "learning_rate": 9.876497976180978e-06, "loss": 1.1771, "step": 1282 }, { "epoch": 0.09890533456675918, "grad_norm": 4.010733127593994, "learning_rate": 9.8762220594423e-06, "loss": 1.0699, "step": 1283 }, { "epoch": 0.09898242368177614, "grad_norm": 3.891744613647461, "learning_rate": 9.87594583869643e-06, "loss": 1.1908, "step": 1284 }, { "epoch": 0.09905951279679309, "grad_norm": 4.345311164855957, "learning_rate": 9.875669313960588e-06, "loss": 1.2161, "step": 1285 }, { "epoch": 0.09913660191181005, "grad_norm": 3.8743138313293457, "learning_rate": 9.87539248525202e-06, "loss": 1.2969, "step": 1286 }, { "epoch": 0.099213691026827, "grad_norm": 4.233188152313232, "learning_rate": 9.875115352587977e-06, "loss": 1.1773, "step": 1287 }, { "epoch": 0.09929078014184398, "grad_norm": 3.835361957550049, "learning_rate": 9.874837915985743e-06, "loss": 1.2376, "step": 1288 }, { "epoch": 0.09936786925686093, "grad_norm": 3.9066238403320312, "learning_rate": 9.874560175462612e-06, "loss": 1.2814, "step": 1289 }, { "epoch": 0.09944495837187789, "grad_norm": 3.9215028285980225, "learning_rate": 9.874282131035899e-06, "loss": 1.1192, "step": 1290 }, { "epoch": 0.09952204748689485, "grad_norm": 3.68405818939209, "learning_rate": 9.87400378272294e-06, "loss": 1.2173, "step": 1291 }, { "epoch": 0.09959913660191182, "grad_norm": 3.8745553493499756, "learning_rate": 9.87372513054109e-06, "loss": 1.1877, "step": 1292 }, { "epoch": 0.09967622571692877, "grad_norm": 4.057330131530762, "learning_rate": 9.873446174507719e-06, "loss": 1.2889, "step": 1293 }, { "epoch": 0.09975331483194573, "grad_norm": 4.208259105682373, "learning_rate": 9.873166914640218e-06, "loss": 1.1993, "step": 1294 }, { "epoch": 0.09983040394696269, "grad_norm": 3.965298652648926, "learning_rate": 9.872887350956e-06, "loss": 1.1844, "step": 1295 }, { "epoch": 0.09990749306197964, "grad_norm": 4.23174524307251, "learning_rate": 9.872607483472491e-06, "loss": 1.1607, "step": 1296 }, { "epoch": 0.09998458217699661, "grad_norm": 4.803770542144775, "learning_rate": 9.872327312207145e-06, "loss": 1.2843, "step": 1297 }, { "epoch": 0.10006167129201357, "grad_norm": 4.673795223236084, "learning_rate": 9.872046837177421e-06, "loss": 1.2134, "step": 1298 }, { "epoch": 0.10013876040703053, "grad_norm": 3.8976125717163086, "learning_rate": 9.871766058400812e-06, "loss": 1.1635, "step": 1299 }, { "epoch": 0.10021584952204748, "grad_norm": 4.471933841705322, "learning_rate": 9.87148497589482e-06, "loss": 1.1844, "step": 1300 }, { "epoch": 0.10029293863706444, "grad_norm": 4.1576457023620605, "learning_rate": 9.871203589676971e-06, "loss": 1.1461, "step": 1301 }, { "epoch": 0.10037002775208141, "grad_norm": 3.978757381439209, "learning_rate": 9.870921899764807e-06, "loss": 1.2684, "step": 1302 }, { "epoch": 0.10044711686709837, "grad_norm": 3.5319221019744873, "learning_rate": 9.87063990617589e-06, "loss": 1.1454, "step": 1303 }, { "epoch": 0.10052420598211532, "grad_norm": 4.05499267578125, "learning_rate": 9.870357608927798e-06, "loss": 1.1962, "step": 1304 }, { "epoch": 0.10060129509713228, "grad_norm": 4.118753433227539, "learning_rate": 9.870075008038137e-06, "loss": 1.1875, "step": 1305 }, { "epoch": 0.10067838421214924, "grad_norm": 3.8087265491485596, "learning_rate": 9.869792103524517e-06, "loss": 1.1726, "step": 1306 }, { "epoch": 0.10075547332716621, "grad_norm": 4.719503879547119, "learning_rate": 9.869508895404584e-06, "loss": 1.1398, "step": 1307 }, { "epoch": 0.10083256244218317, "grad_norm": 4.060819625854492, "learning_rate": 9.86922538369599e-06, "loss": 1.2485, "step": 1308 }, { "epoch": 0.10090965155720012, "grad_norm": 4.301494121551514, "learning_rate": 9.868941568416413e-06, "loss": 1.1975, "step": 1309 }, { "epoch": 0.10098674067221708, "grad_norm": 3.7967967987060547, "learning_rate": 9.868657449583547e-06, "loss": 1.1155, "step": 1310 }, { "epoch": 0.10106382978723404, "grad_norm": 5.051084995269775, "learning_rate": 9.8683730272151e-06, "loss": 1.2833, "step": 1311 }, { "epoch": 0.101140918902251, "grad_norm": 5.0358381271362305, "learning_rate": 9.868088301328813e-06, "loss": 1.358, "step": 1312 }, { "epoch": 0.10121800801726796, "grad_norm": 4.280052185058594, "learning_rate": 9.867803271942432e-06, "loss": 1.2046, "step": 1313 }, { "epoch": 0.10129509713228492, "grad_norm": 3.971712350845337, "learning_rate": 9.867517939073727e-06, "loss": 1.1826, "step": 1314 }, { "epoch": 0.10137218624730188, "grad_norm": 4.396337985992432, "learning_rate": 9.867232302740489e-06, "loss": 1.2291, "step": 1315 }, { "epoch": 0.10144927536231885, "grad_norm": 3.7291693687438965, "learning_rate": 9.866946362960526e-06, "loss": 1.2395, "step": 1316 }, { "epoch": 0.1015263644773358, "grad_norm": 3.65598726272583, "learning_rate": 9.86666011975166e-06, "loss": 1.2252, "step": 1317 }, { "epoch": 0.10160345359235276, "grad_norm": 3.8488965034484863, "learning_rate": 9.866373573131744e-06, "loss": 1.132, "step": 1318 }, { "epoch": 0.10168054270736972, "grad_norm": 4.217064380645752, "learning_rate": 9.86608672311864e-06, "loss": 1.2116, "step": 1319 }, { "epoch": 0.10175763182238667, "grad_norm": 3.518402576446533, "learning_rate": 9.865799569730227e-06, "loss": 1.0713, "step": 1320 }, { "epoch": 0.10183472093740364, "grad_norm": 4.360300064086914, "learning_rate": 9.865512112984414e-06, "loss": 1.2738, "step": 1321 }, { "epoch": 0.1019118100524206, "grad_norm": 4.2740068435668945, "learning_rate": 9.86522435289912e-06, "loss": 1.3994, "step": 1322 }, { "epoch": 0.10198889916743756, "grad_norm": 4.366950511932373, "learning_rate": 9.864936289492285e-06, "loss": 1.2464, "step": 1323 }, { "epoch": 0.10206598828245451, "grad_norm": 4.6967997550964355, "learning_rate": 9.864647922781868e-06, "loss": 1.2328, "step": 1324 }, { "epoch": 0.10214307739747147, "grad_norm": 3.914935827255249, "learning_rate": 9.864359252785847e-06, "loss": 1.2629, "step": 1325 }, { "epoch": 0.10222016651248844, "grad_norm": 3.91147780418396, "learning_rate": 9.864070279522222e-06, "loss": 1.1347, "step": 1326 }, { "epoch": 0.1022972556275054, "grad_norm": 3.9424116611480713, "learning_rate": 9.863781003009005e-06, "loss": 1.1586, "step": 1327 }, { "epoch": 0.10237434474252236, "grad_norm": 4.317541122436523, "learning_rate": 9.863491423264232e-06, "loss": 1.2135, "step": 1328 }, { "epoch": 0.10245143385753931, "grad_norm": 3.8650455474853516, "learning_rate": 9.86320154030596e-06, "loss": 1.1562, "step": 1329 }, { "epoch": 0.10252852297255627, "grad_norm": 4.095459461212158, "learning_rate": 9.862911354152258e-06, "loss": 1.0685, "step": 1330 }, { "epoch": 0.10260561208757324, "grad_norm": 4.160573482513428, "learning_rate": 9.862620864821218e-06, "loss": 1.2398, "step": 1331 }, { "epoch": 0.1026827012025902, "grad_norm": 4.309733867645264, "learning_rate": 9.862330072330953e-06, "loss": 1.1783, "step": 1332 }, { "epoch": 0.10275979031760715, "grad_norm": 4.311089038848877, "learning_rate": 9.862038976699589e-06, "loss": 1.2346, "step": 1333 }, { "epoch": 0.10283687943262411, "grad_norm": 4.145449638366699, "learning_rate": 9.861747577945275e-06, "loss": 1.241, "step": 1334 }, { "epoch": 0.10291396854764108, "grad_norm": 4.2260236740112305, "learning_rate": 9.861455876086181e-06, "loss": 1.2695, "step": 1335 }, { "epoch": 0.10299105766265804, "grad_norm": 4.495214462280273, "learning_rate": 9.86116387114049e-06, "loss": 1.2767, "step": 1336 }, { "epoch": 0.103068146777675, "grad_norm": 4.408441066741943, "learning_rate": 9.860871563126409e-06, "loss": 1.1875, "step": 1337 }, { "epoch": 0.10314523589269195, "grad_norm": 3.792508363723755, "learning_rate": 9.860578952062161e-06, "loss": 1.1621, "step": 1338 }, { "epoch": 0.1032223250077089, "grad_norm": 4.099066734313965, "learning_rate": 9.86028603796599e-06, "loss": 1.244, "step": 1339 }, { "epoch": 0.10329941412272588, "grad_norm": 3.596609354019165, "learning_rate": 9.859992820856155e-06, "loss": 1.151, "step": 1340 }, { "epoch": 0.10337650323774283, "grad_norm": 4.01649808883667, "learning_rate": 9.859699300750937e-06, "loss": 1.1552, "step": 1341 }, { "epoch": 0.10345359235275979, "grad_norm": 3.891861915588379, "learning_rate": 9.85940547766864e-06, "loss": 1.1511, "step": 1342 }, { "epoch": 0.10353068146777675, "grad_norm": 3.784027576446533, "learning_rate": 9.859111351627576e-06, "loss": 1.0498, "step": 1343 }, { "epoch": 0.1036077705827937, "grad_norm": 3.873615026473999, "learning_rate": 9.858816922646088e-06, "loss": 1.1498, "step": 1344 }, { "epoch": 0.10368485969781067, "grad_norm": 4.082927227020264, "learning_rate": 9.858522190742529e-06, "loss": 1.2154, "step": 1345 }, { "epoch": 0.10376194881282763, "grad_norm": 3.9876515865325928, "learning_rate": 9.858227155935271e-06, "loss": 1.1871, "step": 1346 }, { "epoch": 0.10383903792784459, "grad_norm": 3.943120241165161, "learning_rate": 9.857931818242715e-06, "loss": 1.0969, "step": 1347 }, { "epoch": 0.10391612704286154, "grad_norm": 4.118403434753418, "learning_rate": 9.857636177683267e-06, "loss": 1.0838, "step": 1348 }, { "epoch": 0.1039932161578785, "grad_norm": 3.8626832962036133, "learning_rate": 9.857340234275363e-06, "loss": 1.1041, "step": 1349 }, { "epoch": 0.10407030527289547, "grad_norm": 3.960684299468994, "learning_rate": 9.857043988037453e-06, "loss": 1.1214, "step": 1350 }, { "epoch": 0.10414739438791243, "grad_norm": 3.8273346424102783, "learning_rate": 9.856747438988005e-06, "loss": 1.1456, "step": 1351 }, { "epoch": 0.10422448350292939, "grad_norm": 4.401994705200195, "learning_rate": 9.856450587145507e-06, "loss": 1.1389, "step": 1352 }, { "epoch": 0.10430157261794634, "grad_norm": 4.069916248321533, "learning_rate": 9.856153432528467e-06, "loss": 1.2821, "step": 1353 }, { "epoch": 0.1043786617329633, "grad_norm": 3.9717392921447754, "learning_rate": 9.855855975155414e-06, "loss": 1.1286, "step": 1354 }, { "epoch": 0.10445575084798027, "grad_norm": 3.949397087097168, "learning_rate": 9.855558215044887e-06, "loss": 1.1939, "step": 1355 }, { "epoch": 0.10453283996299723, "grad_norm": 3.813549757003784, "learning_rate": 9.855260152215455e-06, "loss": 1.069, "step": 1356 }, { "epoch": 0.10460992907801418, "grad_norm": 4.000561237335205, "learning_rate": 9.854961786685697e-06, "loss": 1.1664, "step": 1357 }, { "epoch": 0.10468701819303114, "grad_norm": 4.400665283203125, "learning_rate": 9.854663118474217e-06, "loss": 1.1951, "step": 1358 }, { "epoch": 0.10476410730804811, "grad_norm": 4.009283542633057, "learning_rate": 9.854364147599635e-06, "loss": 1.1911, "step": 1359 }, { "epoch": 0.10484119642306507, "grad_norm": 4.075754642486572, "learning_rate": 9.854064874080589e-06, "loss": 1.2182, "step": 1360 }, { "epoch": 0.10491828553808202, "grad_norm": 3.848576784133911, "learning_rate": 9.853765297935738e-06, "loss": 1.1876, "step": 1361 }, { "epoch": 0.10499537465309898, "grad_norm": 4.087264060974121, "learning_rate": 9.853465419183759e-06, "loss": 1.2082, "step": 1362 }, { "epoch": 0.10507246376811594, "grad_norm": 4.3004913330078125, "learning_rate": 9.853165237843347e-06, "loss": 1.1629, "step": 1363 }, { "epoch": 0.10514955288313291, "grad_norm": 3.889831781387329, "learning_rate": 9.852864753933218e-06, "loss": 1.1833, "step": 1364 }, { "epoch": 0.10522664199814986, "grad_norm": 3.902057409286499, "learning_rate": 9.852563967472106e-06, "loss": 1.2362, "step": 1365 }, { "epoch": 0.10530373111316682, "grad_norm": 4.216920852661133, "learning_rate": 9.852262878478762e-06, "loss": 1.2058, "step": 1366 }, { "epoch": 0.10538082022818378, "grad_norm": 3.6932597160339355, "learning_rate": 9.851961486971959e-06, "loss": 1.133, "step": 1367 }, { "epoch": 0.10545790934320073, "grad_norm": 3.947735071182251, "learning_rate": 9.851659792970485e-06, "loss": 1.2308, "step": 1368 }, { "epoch": 0.1055349984582177, "grad_norm": 3.9944536685943604, "learning_rate": 9.85135779649315e-06, "loss": 1.1327, "step": 1369 }, { "epoch": 0.10561208757323466, "grad_norm": 3.8380393981933594, "learning_rate": 9.851055497558783e-06, "loss": 1.2182, "step": 1370 }, { "epoch": 0.10568917668825162, "grad_norm": 3.927506446838379, "learning_rate": 9.850752896186231e-06, "loss": 1.1246, "step": 1371 }, { "epoch": 0.10576626580326857, "grad_norm": 3.877490520477295, "learning_rate": 9.850449992394357e-06, "loss": 1.1259, "step": 1372 }, { "epoch": 0.10584335491828553, "grad_norm": 3.9563748836517334, "learning_rate": 9.850146786202048e-06, "loss": 1.2217, "step": 1373 }, { "epoch": 0.1059204440333025, "grad_norm": 3.84112811088562, "learning_rate": 9.849843277628206e-06, "loss": 1.1055, "step": 1374 }, { "epoch": 0.10599753314831946, "grad_norm": 4.415043830871582, "learning_rate": 9.849539466691755e-06, "loss": 1.1178, "step": 1375 }, { "epoch": 0.10607462226333642, "grad_norm": 3.7989046573638916, "learning_rate": 9.849235353411632e-06, "loss": 1.2218, "step": 1376 }, { "epoch": 0.10615171137835337, "grad_norm": 4.221613883972168, "learning_rate": 9.848930937806802e-06, "loss": 1.3153, "step": 1377 }, { "epoch": 0.10622880049337034, "grad_norm": 3.621276378631592, "learning_rate": 9.84862621989624e-06, "loss": 1.18, "step": 1378 }, { "epoch": 0.1063058896083873, "grad_norm": 4.181283473968506, "learning_rate": 9.848321199698945e-06, "loss": 1.1736, "step": 1379 }, { "epoch": 0.10638297872340426, "grad_norm": 4.222801685333252, "learning_rate": 9.848015877233935e-06, "loss": 1.1519, "step": 1380 }, { "epoch": 0.10646006783842121, "grad_norm": 4.010768890380859, "learning_rate": 9.847710252520242e-06, "loss": 1.1414, "step": 1381 }, { "epoch": 0.10653715695343817, "grad_norm": 4.021345138549805, "learning_rate": 9.847404325576921e-06, "loss": 1.2202, "step": 1382 }, { "epoch": 0.10661424606845514, "grad_norm": 4.335690498352051, "learning_rate": 9.847098096423046e-06, "loss": 1.1836, "step": 1383 }, { "epoch": 0.1066913351834721, "grad_norm": 4.25598669052124, "learning_rate": 9.84679156507771e-06, "loss": 1.2209, "step": 1384 }, { "epoch": 0.10676842429848905, "grad_norm": 3.9418535232543945, "learning_rate": 9.84648473156002e-06, "loss": 1.2035, "step": 1385 }, { "epoch": 0.10684551341350601, "grad_norm": 4.38209867477417, "learning_rate": 9.84617759588911e-06, "loss": 1.2745, "step": 1386 }, { "epoch": 0.10692260252852297, "grad_norm": 3.9577906131744385, "learning_rate": 9.845870158084123e-06, "loss": 1.2592, "step": 1387 }, { "epoch": 0.10699969164353994, "grad_norm": 3.4693827629089355, "learning_rate": 9.845562418164232e-06, "loss": 1.1585, "step": 1388 }, { "epoch": 0.1070767807585569, "grad_norm": 3.7138640880584717, "learning_rate": 9.845254376148617e-06, "loss": 1.1364, "step": 1389 }, { "epoch": 0.10715386987357385, "grad_norm": 3.9818434715270996, "learning_rate": 9.844946032056487e-06, "loss": 1.0948, "step": 1390 }, { "epoch": 0.10723095898859081, "grad_norm": 3.7591545581817627, "learning_rate": 9.844637385907066e-06, "loss": 1.0582, "step": 1391 }, { "epoch": 0.10730804810360776, "grad_norm": 3.9569385051727295, "learning_rate": 9.844328437719595e-06, "loss": 1.1992, "step": 1392 }, { "epoch": 0.10738513721862473, "grad_norm": 3.6534078121185303, "learning_rate": 9.844019187513335e-06, "loss": 1.1582, "step": 1393 }, { "epoch": 0.10746222633364169, "grad_norm": 3.8868963718414307, "learning_rate": 9.843709635307563e-06, "loss": 1.2037, "step": 1394 }, { "epoch": 0.10753931544865865, "grad_norm": 4.207004070281982, "learning_rate": 9.843399781121585e-06, "loss": 1.2641, "step": 1395 }, { "epoch": 0.1076164045636756, "grad_norm": 3.9298391342163086, "learning_rate": 9.843089624974716e-06, "loss": 1.1404, "step": 1396 }, { "epoch": 0.10769349367869256, "grad_norm": 3.9258861541748047, "learning_rate": 9.84277916688629e-06, "loss": 1.184, "step": 1397 }, { "epoch": 0.10777058279370953, "grad_norm": 3.8614659309387207, "learning_rate": 9.842468406875665e-06, "loss": 1.2738, "step": 1398 }, { "epoch": 0.10784767190872649, "grad_norm": 4.084595680236816, "learning_rate": 9.842157344962214e-06, "loss": 1.2203, "step": 1399 }, { "epoch": 0.10792476102374345, "grad_norm": 3.8629918098449707, "learning_rate": 9.84184598116533e-06, "loss": 1.1434, "step": 1400 }, { "epoch": 0.1080018501387604, "grad_norm": 3.8280506134033203, "learning_rate": 9.841534315504427e-06, "loss": 1.1438, "step": 1401 }, { "epoch": 0.10807893925377737, "grad_norm": 4.113556861877441, "learning_rate": 9.841222347998933e-06, "loss": 1.2824, "step": 1402 }, { "epoch": 0.10815602836879433, "grad_norm": 4.399749279022217, "learning_rate": 9.8409100786683e-06, "loss": 1.178, "step": 1403 }, { "epoch": 0.10823311748381129, "grad_norm": 4.358874320983887, "learning_rate": 9.840597507531997e-06, "loss": 1.2722, "step": 1404 }, { "epoch": 0.10831020659882824, "grad_norm": 3.7543954849243164, "learning_rate": 9.840284634609508e-06, "loss": 1.1464, "step": 1405 }, { "epoch": 0.1083872957138452, "grad_norm": 4.325103282928467, "learning_rate": 9.839971459920338e-06, "loss": 1.1934, "step": 1406 }, { "epoch": 0.10846438482886217, "grad_norm": 4.050751209259033, "learning_rate": 9.839657983484018e-06, "loss": 1.3296, "step": 1407 }, { "epoch": 0.10854147394387913, "grad_norm": 4.21981954574585, "learning_rate": 9.839344205320088e-06, "loss": 1.2409, "step": 1408 }, { "epoch": 0.10861856305889608, "grad_norm": 4.275995254516602, "learning_rate": 9.839030125448108e-06, "loss": 1.3003, "step": 1409 }, { "epoch": 0.10869565217391304, "grad_norm": 4.038300514221191, "learning_rate": 9.838715743887662e-06, "loss": 1.1667, "step": 1410 }, { "epoch": 0.10877274128893, "grad_norm": 3.92463755607605, "learning_rate": 9.838401060658352e-06, "loss": 1.1469, "step": 1411 }, { "epoch": 0.10884983040394697, "grad_norm": 4.041322231292725, "learning_rate": 9.83808607577979e-06, "loss": 1.2825, "step": 1412 }, { "epoch": 0.10892691951896392, "grad_norm": 4.066822528839111, "learning_rate": 9.837770789271623e-06, "loss": 1.1624, "step": 1413 }, { "epoch": 0.10900400863398088, "grad_norm": 3.8004565238952637, "learning_rate": 9.8374552011535e-06, "loss": 1.1102, "step": 1414 }, { "epoch": 0.10908109774899784, "grad_norm": 3.726020336151123, "learning_rate": 9.837139311445102e-06, "loss": 1.1266, "step": 1415 }, { "epoch": 0.1091581868640148, "grad_norm": 4.26671028137207, "learning_rate": 9.836823120166116e-06, "loss": 1.2413, "step": 1416 }, { "epoch": 0.10923527597903177, "grad_norm": 3.6223459243774414, "learning_rate": 9.836506627336261e-06, "loss": 1.1612, "step": 1417 }, { "epoch": 0.10931236509404872, "grad_norm": 3.5984158515930176, "learning_rate": 9.836189832975267e-06, "loss": 1.0419, "step": 1418 }, { "epoch": 0.10938945420906568, "grad_norm": 3.670827627182007, "learning_rate": 9.835872737102886e-06, "loss": 1.2345, "step": 1419 }, { "epoch": 0.10946654332408264, "grad_norm": 3.62565541267395, "learning_rate": 9.835555339738882e-06, "loss": 1.0729, "step": 1420 }, { "epoch": 0.1095436324390996, "grad_norm": 4.1612958908081055, "learning_rate": 9.83523764090305e-06, "loss": 1.2735, "step": 1421 }, { "epoch": 0.10962072155411656, "grad_norm": 3.878818988800049, "learning_rate": 9.83491964061519e-06, "loss": 1.2431, "step": 1422 }, { "epoch": 0.10969781066913352, "grad_norm": 3.963610887527466, "learning_rate": 9.834601338895133e-06, "loss": 1.2172, "step": 1423 }, { "epoch": 0.10977489978415048, "grad_norm": 4.701980113983154, "learning_rate": 9.83428273576272e-06, "loss": 1.2959, "step": 1424 }, { "epoch": 0.10985198889916743, "grad_norm": 4.165730953216553, "learning_rate": 9.833963831237819e-06, "loss": 1.1758, "step": 1425 }, { "epoch": 0.1099290780141844, "grad_norm": 4.231865882873535, "learning_rate": 9.833644625340305e-06, "loss": 1.1345, "step": 1426 }, { "epoch": 0.11000616712920136, "grad_norm": 3.8427863121032715, "learning_rate": 9.833325118090086e-06, "loss": 1.1748, "step": 1427 }, { "epoch": 0.11008325624421832, "grad_norm": 4.339493274688721, "learning_rate": 9.833005309507076e-06, "loss": 1.2364, "step": 1428 }, { "epoch": 0.11016034535923527, "grad_norm": 3.940251350402832, "learning_rate": 9.832685199611217e-06, "loss": 1.1587, "step": 1429 }, { "epoch": 0.11023743447425223, "grad_norm": 4.076071739196777, "learning_rate": 9.832364788422464e-06, "loss": 1.1959, "step": 1430 }, { "epoch": 0.1103145235892692, "grad_norm": 3.910961389541626, "learning_rate": 9.832044075960795e-06, "loss": 1.0885, "step": 1431 }, { "epoch": 0.11039161270428616, "grad_norm": 4.00604772567749, "learning_rate": 9.831723062246204e-06, "loss": 1.2461, "step": 1432 }, { "epoch": 0.11046870181930311, "grad_norm": 4.331727981567383, "learning_rate": 9.831401747298702e-06, "loss": 1.2664, "step": 1433 }, { "epoch": 0.11054579093432007, "grad_norm": 4.105038642883301, "learning_rate": 9.831080131138325e-06, "loss": 1.1604, "step": 1434 }, { "epoch": 0.11062288004933703, "grad_norm": 4.079939365386963, "learning_rate": 9.830758213785123e-06, "loss": 1.1103, "step": 1435 }, { "epoch": 0.110699969164354, "grad_norm": 3.9948172569274902, "learning_rate": 9.830435995259165e-06, "loss": 1.1889, "step": 1436 }, { "epoch": 0.11077705827937095, "grad_norm": 4.091009616851807, "learning_rate": 9.830113475580541e-06, "loss": 1.1374, "step": 1437 }, { "epoch": 0.11085414739438791, "grad_norm": 4.397194862365723, "learning_rate": 9.829790654769356e-06, "loss": 1.1226, "step": 1438 }, { "epoch": 0.11093123650940487, "grad_norm": 4.208889961242676, "learning_rate": 9.829467532845738e-06, "loss": 1.1658, "step": 1439 }, { "epoch": 0.11100832562442182, "grad_norm": 4.262278079986572, "learning_rate": 9.829144109829832e-06, "loss": 1.3317, "step": 1440 }, { "epoch": 0.1110854147394388, "grad_norm": 4.373659133911133, "learning_rate": 9.828820385741802e-06, "loss": 1.1994, "step": 1441 }, { "epoch": 0.11116250385445575, "grad_norm": 3.869906425476074, "learning_rate": 9.82849636060183e-06, "loss": 1.2039, "step": 1442 }, { "epoch": 0.11123959296947271, "grad_norm": 3.81036114692688, "learning_rate": 9.828172034430118e-06, "loss": 1.0779, "step": 1443 }, { "epoch": 0.11131668208448967, "grad_norm": 4.158944129943848, "learning_rate": 9.827847407246885e-06, "loss": 1.1907, "step": 1444 }, { "epoch": 0.11139377119950664, "grad_norm": 4.358891487121582, "learning_rate": 9.827522479072369e-06, "loss": 1.1776, "step": 1445 }, { "epoch": 0.11147086031452359, "grad_norm": 4.275304794311523, "learning_rate": 9.82719724992683e-06, "loss": 1.2849, "step": 1446 }, { "epoch": 0.11154794942954055, "grad_norm": 4.336565971374512, "learning_rate": 9.826871719830542e-06, "loss": 1.1965, "step": 1447 }, { "epoch": 0.1116250385445575, "grad_norm": 4.239962577819824, "learning_rate": 9.826545888803802e-06, "loss": 1.229, "step": 1448 }, { "epoch": 0.11170212765957446, "grad_norm": 4.397287368774414, "learning_rate": 9.826219756866923e-06, "loss": 1.2252, "step": 1449 }, { "epoch": 0.11177921677459143, "grad_norm": 4.538805961608887, "learning_rate": 9.82589332404024e-06, "loss": 1.2506, "step": 1450 }, { "epoch": 0.11185630588960839, "grad_norm": 3.814425468444824, "learning_rate": 9.825566590344098e-06, "loss": 1.1343, "step": 1451 }, { "epoch": 0.11193339500462535, "grad_norm": 3.6596169471740723, "learning_rate": 9.825239555798875e-06, "loss": 1.2412, "step": 1452 }, { "epoch": 0.1120104841196423, "grad_norm": 4.104322910308838, "learning_rate": 9.824912220424953e-06, "loss": 1.1035, "step": 1453 }, { "epoch": 0.11208757323465926, "grad_norm": 3.8343546390533447, "learning_rate": 9.824584584242746e-06, "loss": 1.1575, "step": 1454 }, { "epoch": 0.11216466234967623, "grad_norm": 3.6301510334014893, "learning_rate": 9.824256647272676e-06, "loss": 1.2006, "step": 1455 }, { "epoch": 0.11224175146469319, "grad_norm": 4.543994903564453, "learning_rate": 9.823928409535191e-06, "loss": 1.2539, "step": 1456 }, { "epoch": 0.11231884057971014, "grad_norm": 4.104208469390869, "learning_rate": 9.82359987105075e-06, "loss": 1.2179, "step": 1457 }, { "epoch": 0.1123959296947271, "grad_norm": 4.09733772277832, "learning_rate": 9.823271031839843e-06, "loss": 1.1579, "step": 1458 }, { "epoch": 0.11247301880974406, "grad_norm": 4.35043478012085, "learning_rate": 9.822941891922965e-06, "loss": 1.2598, "step": 1459 }, { "epoch": 0.11255010792476103, "grad_norm": 4.023888111114502, "learning_rate": 9.82261245132064e-06, "loss": 1.1077, "step": 1460 }, { "epoch": 0.11262719703977798, "grad_norm": 3.594707489013672, "learning_rate": 9.822282710053403e-06, "loss": 1.1578, "step": 1461 }, { "epoch": 0.11270428615479494, "grad_norm": 4.166836261749268, "learning_rate": 9.821952668141817e-06, "loss": 1.1718, "step": 1462 }, { "epoch": 0.1127813752698119, "grad_norm": 3.9749162197113037, "learning_rate": 9.821622325606454e-06, "loss": 1.1749, "step": 1463 }, { "epoch": 0.11285846438482887, "grad_norm": 4.0859150886535645, "learning_rate": 9.821291682467912e-06, "loss": 1.1667, "step": 1464 }, { "epoch": 0.11293555349984583, "grad_norm": 5.696316242218018, "learning_rate": 9.820960738746805e-06, "loss": 1.0963, "step": 1465 }, { "epoch": 0.11301264261486278, "grad_norm": 4.3305559158325195, "learning_rate": 9.820629494463763e-06, "loss": 1.1439, "step": 1466 }, { "epoch": 0.11308973172987974, "grad_norm": 4.143655300140381, "learning_rate": 9.820297949639439e-06, "loss": 1.0771, "step": 1467 }, { "epoch": 0.1131668208448967, "grad_norm": 4.505462169647217, "learning_rate": 9.8199661042945e-06, "loss": 1.2824, "step": 1468 }, { "epoch": 0.11324390995991367, "grad_norm": 3.8366472721099854, "learning_rate": 9.819633958449642e-06, "loss": 1.1783, "step": 1469 }, { "epoch": 0.11332099907493062, "grad_norm": 4.8427910804748535, "learning_rate": 9.819301512125565e-06, "loss": 1.3027, "step": 1470 }, { "epoch": 0.11339808818994758, "grad_norm": 4.196298599243164, "learning_rate": 9.818968765343e-06, "loss": 1.1545, "step": 1471 }, { "epoch": 0.11347517730496454, "grad_norm": 4.095616817474365, "learning_rate": 9.818635718122692e-06, "loss": 1.1564, "step": 1472 }, { "epoch": 0.11355226641998149, "grad_norm": 4.081837177276611, "learning_rate": 9.8183023704854e-06, "loss": 1.141, "step": 1473 }, { "epoch": 0.11362935553499846, "grad_norm": 4.244540214538574, "learning_rate": 9.817968722451911e-06, "loss": 1.2587, "step": 1474 }, { "epoch": 0.11370644465001542, "grad_norm": 4.162115097045898, "learning_rate": 9.817634774043026e-06, "loss": 1.2686, "step": 1475 }, { "epoch": 0.11378353376503238, "grad_norm": 4.111065864562988, "learning_rate": 9.817300525279562e-06, "loss": 1.1118, "step": 1476 }, { "epoch": 0.11386062288004933, "grad_norm": 4.0052714347839355, "learning_rate": 9.816965976182362e-06, "loss": 1.145, "step": 1477 }, { "epoch": 0.11393771199506629, "grad_norm": 3.8451459407806396, "learning_rate": 9.81663112677228e-06, "loss": 1.0982, "step": 1478 }, { "epoch": 0.11401480111008326, "grad_norm": 4.380990982055664, "learning_rate": 9.816295977070193e-06, "loss": 1.2695, "step": 1479 }, { "epoch": 0.11409189022510022, "grad_norm": 4.092302322387695, "learning_rate": 9.815960527096996e-06, "loss": 1.2088, "step": 1480 }, { "epoch": 0.11416897934011717, "grad_norm": 3.8186841011047363, "learning_rate": 9.815624776873605e-06, "loss": 1.1231, "step": 1481 }, { "epoch": 0.11424606845513413, "grad_norm": 4.1122894287109375, "learning_rate": 9.815288726420949e-06, "loss": 1.1516, "step": 1482 }, { "epoch": 0.1143231575701511, "grad_norm": 4.764173984527588, "learning_rate": 9.814952375759979e-06, "loss": 1.2084, "step": 1483 }, { "epoch": 0.11440024668516806, "grad_norm": 4.217214584350586, "learning_rate": 9.814615724911664e-06, "loss": 1.0574, "step": 1484 }, { "epoch": 0.11447733580018501, "grad_norm": 3.7358455657958984, "learning_rate": 9.814278773896997e-06, "loss": 1.1282, "step": 1485 }, { "epoch": 0.11455442491520197, "grad_norm": 3.7960875034332275, "learning_rate": 9.813941522736981e-06, "loss": 1.1453, "step": 1486 }, { "epoch": 0.11463151403021893, "grad_norm": 4.146248817443848, "learning_rate": 9.813603971452643e-06, "loss": 1.2265, "step": 1487 }, { "epoch": 0.1147086031452359, "grad_norm": 4.388052463531494, "learning_rate": 9.813266120065028e-06, "loss": 1.1369, "step": 1488 }, { "epoch": 0.11478569226025286, "grad_norm": 4.071401119232178, "learning_rate": 9.812927968595199e-06, "loss": 1.1689, "step": 1489 }, { "epoch": 0.11486278137526981, "grad_norm": 4.517001628875732, "learning_rate": 9.812589517064237e-06, "loss": 1.1537, "step": 1490 }, { "epoch": 0.11493987049028677, "grad_norm": 4.021905899047852, "learning_rate": 9.812250765493243e-06, "loss": 1.0266, "step": 1491 }, { "epoch": 0.11501695960530373, "grad_norm": 4.433926105499268, "learning_rate": 9.811911713903339e-06, "loss": 1.108, "step": 1492 }, { "epoch": 0.1150940487203207, "grad_norm": 4.291812419891357, "learning_rate": 9.811572362315661e-06, "loss": 1.2626, "step": 1493 }, { "epoch": 0.11517113783533765, "grad_norm": 4.35188627243042, "learning_rate": 9.811232710751366e-06, "loss": 1.1318, "step": 1494 }, { "epoch": 0.11524822695035461, "grad_norm": 4.012880325317383, "learning_rate": 9.810892759231629e-06, "loss": 1.1044, "step": 1495 }, { "epoch": 0.11532531606537157, "grad_norm": 4.010427474975586, "learning_rate": 9.810552507777643e-06, "loss": 1.2086, "step": 1496 }, { "epoch": 0.11540240518038852, "grad_norm": 3.74871563911438, "learning_rate": 9.810211956410625e-06, "loss": 1.1279, "step": 1497 }, { "epoch": 0.1154794942954055, "grad_norm": 3.983808755874634, "learning_rate": 9.809871105151805e-06, "loss": 1.1586, "step": 1498 }, { "epoch": 0.11555658341042245, "grad_norm": 4.453268051147461, "learning_rate": 9.80952995402243e-06, "loss": 1.156, "step": 1499 }, { "epoch": 0.11563367252543941, "grad_norm": 4.205508708953857, "learning_rate": 9.809188503043773e-06, "loss": 1.1945, "step": 1500 }, { "epoch": 0.11571076164045636, "grad_norm": 4.098243713378906, "learning_rate": 9.80884675223712e-06, "loss": 1.1781, "step": 1501 }, { "epoch": 0.11578785075547332, "grad_norm": 4.6129937171936035, "learning_rate": 9.808504701623777e-06, "loss": 1.1673, "step": 1502 }, { "epoch": 0.11586493987049029, "grad_norm": 4.1249494552612305, "learning_rate": 9.808162351225073e-06, "loss": 1.1907, "step": 1503 }, { "epoch": 0.11594202898550725, "grad_norm": 3.684288740158081, "learning_rate": 9.807819701062345e-06, "loss": 1.1824, "step": 1504 }, { "epoch": 0.1160191181005242, "grad_norm": 4.17221212387085, "learning_rate": 9.80747675115696e-06, "loss": 1.1458, "step": 1505 }, { "epoch": 0.11609620721554116, "grad_norm": 3.946536064147949, "learning_rate": 9.807133501530297e-06, "loss": 1.1267, "step": 1506 }, { "epoch": 0.11617329633055813, "grad_norm": 4.113000392913818, "learning_rate": 9.806789952203759e-06, "loss": 1.1736, "step": 1507 }, { "epoch": 0.11625038544557509, "grad_norm": 4.112078666687012, "learning_rate": 9.806446103198761e-06, "loss": 1.2042, "step": 1508 }, { "epoch": 0.11632747456059205, "grad_norm": 3.994236946105957, "learning_rate": 9.806101954536741e-06, "loss": 1.2522, "step": 1509 }, { "epoch": 0.116404563675609, "grad_norm": 4.026162624359131, "learning_rate": 9.805757506239157e-06, "loss": 1.1353, "step": 1510 }, { "epoch": 0.11648165279062596, "grad_norm": 4.128935813903809, "learning_rate": 9.805412758327483e-06, "loss": 1.2068, "step": 1511 }, { "epoch": 0.11655874190564293, "grad_norm": 3.9604172706604004, "learning_rate": 9.80506771082321e-06, "loss": 1.1924, "step": 1512 }, { "epoch": 0.11663583102065989, "grad_norm": 6.198178291320801, "learning_rate": 9.804722363747852e-06, "loss": 1.165, "step": 1513 }, { "epoch": 0.11671292013567684, "grad_norm": 4.178301811218262, "learning_rate": 9.804376717122938e-06, "loss": 1.1974, "step": 1514 }, { "epoch": 0.1167900092506938, "grad_norm": 4.095193386077881, "learning_rate": 9.804030770970019e-06, "loss": 1.1372, "step": 1515 }, { "epoch": 0.11686709836571076, "grad_norm": 3.5960633754730225, "learning_rate": 9.803684525310662e-06, "loss": 1.1578, "step": 1516 }, { "epoch": 0.11694418748072773, "grad_norm": 3.994582414627075, "learning_rate": 9.803337980166455e-06, "loss": 1.1687, "step": 1517 }, { "epoch": 0.11702127659574468, "grad_norm": 4.156383991241455, "learning_rate": 9.802991135558998e-06, "loss": 1.1485, "step": 1518 }, { "epoch": 0.11709836571076164, "grad_norm": 3.9778475761413574, "learning_rate": 9.802643991509923e-06, "loss": 1.2049, "step": 1519 }, { "epoch": 0.1171754548257786, "grad_norm": 4.528823375701904, "learning_rate": 9.802296548040868e-06, "loss": 1.3013, "step": 1520 }, { "epoch": 0.11725254394079555, "grad_norm": 4.259623050689697, "learning_rate": 9.801948805173494e-06, "loss": 1.1648, "step": 1521 }, { "epoch": 0.11732963305581252, "grad_norm": 4.2941460609436035, "learning_rate": 9.801600762929482e-06, "loss": 1.218, "step": 1522 }, { "epoch": 0.11740672217082948, "grad_norm": 4.13802433013916, "learning_rate": 9.801252421330531e-06, "loss": 1.2677, "step": 1523 }, { "epoch": 0.11748381128584644, "grad_norm": 3.9115195274353027, "learning_rate": 9.800903780398357e-06, "loss": 1.0531, "step": 1524 }, { "epoch": 0.1175609004008634, "grad_norm": 4.135284423828125, "learning_rate": 9.800554840154697e-06, "loss": 1.1867, "step": 1525 }, { "epoch": 0.11763798951588036, "grad_norm": 3.7419018745422363, "learning_rate": 9.800205600621307e-06, "loss": 1.1606, "step": 1526 }, { "epoch": 0.11771507863089732, "grad_norm": 4.145817279815674, "learning_rate": 9.799856061819958e-06, "loss": 1.1399, "step": 1527 }, { "epoch": 0.11779216774591428, "grad_norm": 3.8120839595794678, "learning_rate": 9.799506223772442e-06, "loss": 1.1803, "step": 1528 }, { "epoch": 0.11786925686093123, "grad_norm": 4.319042682647705, "learning_rate": 9.79915608650057e-06, "loss": 1.0973, "step": 1529 }, { "epoch": 0.11794634597594819, "grad_norm": 4.340944766998291, "learning_rate": 9.798805650026173e-06, "loss": 1.3635, "step": 1530 }, { "epoch": 0.11802343509096516, "grad_norm": 4.413815975189209, "learning_rate": 9.798454914371096e-06, "loss": 1.1947, "step": 1531 }, { "epoch": 0.11810052420598212, "grad_norm": 3.837949514389038, "learning_rate": 9.798103879557207e-06, "loss": 1.216, "step": 1532 }, { "epoch": 0.11817761332099908, "grad_norm": 4.424009323120117, "learning_rate": 9.797752545606391e-06, "loss": 1.2992, "step": 1533 }, { "epoch": 0.11825470243601603, "grad_norm": 3.9235024452209473, "learning_rate": 9.797400912540553e-06, "loss": 1.1753, "step": 1534 }, { "epoch": 0.11833179155103299, "grad_norm": 3.9856998920440674, "learning_rate": 9.797048980381614e-06, "loss": 1.2046, "step": 1535 }, { "epoch": 0.11840888066604996, "grad_norm": 3.952277898788452, "learning_rate": 9.796696749151515e-06, "loss": 1.1669, "step": 1536 }, { "epoch": 0.11848596978106692, "grad_norm": 3.826277732849121, "learning_rate": 9.796344218872218e-06, "loss": 1.2616, "step": 1537 }, { "epoch": 0.11856305889608387, "grad_norm": 4.182483196258545, "learning_rate": 9.795991389565698e-06, "loss": 1.0979, "step": 1538 }, { "epoch": 0.11864014801110083, "grad_norm": 4.177645206451416, "learning_rate": 9.795638261253955e-06, "loss": 1.2607, "step": 1539 }, { "epoch": 0.11871723712611779, "grad_norm": 3.5428824424743652, "learning_rate": 9.795284833959003e-06, "loss": 1.011, "step": 1540 }, { "epoch": 0.11879432624113476, "grad_norm": 4.085594654083252, "learning_rate": 9.794931107702877e-06, "loss": 1.1675, "step": 1541 }, { "epoch": 0.11887141535615171, "grad_norm": 4.093392848968506, "learning_rate": 9.794577082507631e-06, "loss": 1.2673, "step": 1542 }, { "epoch": 0.11894850447116867, "grad_norm": 4.299323081970215, "learning_rate": 9.794222758395336e-06, "loss": 1.2598, "step": 1543 }, { "epoch": 0.11902559358618563, "grad_norm": 3.8711843490600586, "learning_rate": 9.79386813538808e-06, "loss": 1.1941, "step": 1544 }, { "epoch": 0.11910268270120258, "grad_norm": 4.2434983253479, "learning_rate": 9.793513213507974e-06, "loss": 1.1717, "step": 1545 }, { "epoch": 0.11917977181621955, "grad_norm": 4.333231449127197, "learning_rate": 9.793157992777147e-06, "loss": 1.2443, "step": 1546 }, { "epoch": 0.11925686093123651, "grad_norm": 3.959233045578003, "learning_rate": 9.792802473217742e-06, "loss": 1.191, "step": 1547 }, { "epoch": 0.11933395004625347, "grad_norm": 3.931117534637451, "learning_rate": 9.792446654851928e-06, "loss": 1.1582, "step": 1548 }, { "epoch": 0.11941103916127042, "grad_norm": 4.2051777839660645, "learning_rate": 9.792090537701883e-06, "loss": 1.1121, "step": 1549 }, { "epoch": 0.1194881282762874, "grad_norm": 4.133641242980957, "learning_rate": 9.791734121789814e-06, "loss": 1.214, "step": 1550 }, { "epoch": 0.11956521739130435, "grad_norm": 3.797180652618408, "learning_rate": 9.791377407137936e-06, "loss": 1.0736, "step": 1551 }, { "epoch": 0.11964230650632131, "grad_norm": 4.1591477394104, "learning_rate": 9.791020393768495e-06, "loss": 1.2181, "step": 1552 }, { "epoch": 0.11971939562133826, "grad_norm": 4.558741569519043, "learning_rate": 9.790663081703746e-06, "loss": 1.1797, "step": 1553 }, { "epoch": 0.11979648473635522, "grad_norm": 4.007091522216797, "learning_rate": 9.790305470965964e-06, "loss": 1.2147, "step": 1554 }, { "epoch": 0.11987357385137219, "grad_norm": 4.392326831817627, "learning_rate": 9.789947561577445e-06, "loss": 1.2772, "step": 1555 }, { "epoch": 0.11995066296638915, "grad_norm": 3.7429401874542236, "learning_rate": 9.789589353560505e-06, "loss": 1.2085, "step": 1556 }, { "epoch": 0.1200277520814061, "grad_norm": 4.087551116943359, "learning_rate": 9.789230846937473e-06, "loss": 1.1084, "step": 1557 }, { "epoch": 0.12010484119642306, "grad_norm": 4.172464370727539, "learning_rate": 9.788872041730703e-06, "loss": 1.2449, "step": 1558 }, { "epoch": 0.12018193031144002, "grad_norm": 4.280933856964111, "learning_rate": 9.788512937962562e-06, "loss": 1.1364, "step": 1559 }, { "epoch": 0.12025901942645699, "grad_norm": 3.9372894763946533, "learning_rate": 9.788153535655442e-06, "loss": 1.1644, "step": 1560 }, { "epoch": 0.12033610854147395, "grad_norm": 3.8368165493011475, "learning_rate": 9.787793834831745e-06, "loss": 1.138, "step": 1561 }, { "epoch": 0.1204131976564909, "grad_norm": 3.9816088676452637, "learning_rate": 9.787433835513901e-06, "loss": 1.0688, "step": 1562 }, { "epoch": 0.12049028677150786, "grad_norm": 3.9207653999328613, "learning_rate": 9.787073537724351e-06, "loss": 1.1746, "step": 1563 }, { "epoch": 0.12056737588652482, "grad_norm": 4.689396858215332, "learning_rate": 9.78671294148556e-06, "loss": 1.2326, "step": 1564 }, { "epoch": 0.12064446500154179, "grad_norm": 4.082599639892578, "learning_rate": 9.786352046820007e-06, "loss": 1.1073, "step": 1565 }, { "epoch": 0.12072155411655874, "grad_norm": 4.284193515777588, "learning_rate": 9.785990853750193e-06, "loss": 1.2251, "step": 1566 }, { "epoch": 0.1207986432315757, "grad_norm": 3.9901416301727295, "learning_rate": 9.785629362298637e-06, "loss": 1.2174, "step": 1567 }, { "epoch": 0.12087573234659266, "grad_norm": 4.175442695617676, "learning_rate": 9.785267572487876e-06, "loss": 1.1415, "step": 1568 }, { "epoch": 0.12095282146160963, "grad_norm": 4.132479190826416, "learning_rate": 9.784905484340463e-06, "loss": 1.2834, "step": 1569 }, { "epoch": 0.12102991057662658, "grad_norm": 3.794235944747925, "learning_rate": 9.784543097878977e-06, "loss": 1.0903, "step": 1570 }, { "epoch": 0.12110699969164354, "grad_norm": 3.9358041286468506, "learning_rate": 9.784180413126009e-06, "loss": 1.0461, "step": 1571 }, { "epoch": 0.1211840888066605, "grad_norm": 3.783879041671753, "learning_rate": 9.78381743010417e-06, "loss": 1.2404, "step": 1572 }, { "epoch": 0.12126117792167745, "grad_norm": 4.348257064819336, "learning_rate": 9.783454148836089e-06, "loss": 1.2484, "step": 1573 }, { "epoch": 0.12133826703669442, "grad_norm": 4.235533237457275, "learning_rate": 9.783090569344418e-06, "loss": 1.0986, "step": 1574 }, { "epoch": 0.12141535615171138, "grad_norm": 3.815244197845459, "learning_rate": 9.782726691651819e-06, "loss": 0.9998, "step": 1575 }, { "epoch": 0.12149244526672834, "grad_norm": 3.847841739654541, "learning_rate": 9.782362515780983e-06, "loss": 1.1051, "step": 1576 }, { "epoch": 0.1215695343817453, "grad_norm": 4.120035648345947, "learning_rate": 9.781998041754613e-06, "loss": 1.1412, "step": 1577 }, { "epoch": 0.12164662349676225, "grad_norm": 4.0854105949401855, "learning_rate": 9.781633269595432e-06, "loss": 1.1599, "step": 1578 }, { "epoch": 0.12172371261177922, "grad_norm": 4.094209671020508, "learning_rate": 9.78126819932618e-06, "loss": 1.2077, "step": 1579 }, { "epoch": 0.12180080172679618, "grad_norm": 4.1311516761779785, "learning_rate": 9.78090283096962e-06, "loss": 1.1408, "step": 1580 }, { "epoch": 0.12187789084181314, "grad_norm": 3.8271164894104004, "learning_rate": 9.780537164548529e-06, "loss": 1.2063, "step": 1581 }, { "epoch": 0.12195497995683009, "grad_norm": 3.9459309577941895, "learning_rate": 9.780171200085703e-06, "loss": 1.1439, "step": 1582 }, { "epoch": 0.12203206907184705, "grad_norm": 4.513016223907471, "learning_rate": 9.779804937603963e-06, "loss": 1.2068, "step": 1583 }, { "epoch": 0.12210915818686402, "grad_norm": 4.303330421447754, "learning_rate": 9.779438377126138e-06, "loss": 1.2965, "step": 1584 }, { "epoch": 0.12218624730188098, "grad_norm": 4.000280380249023, "learning_rate": 9.779071518675086e-06, "loss": 1.1383, "step": 1585 }, { "epoch": 0.12226333641689793, "grad_norm": 3.8880882263183594, "learning_rate": 9.778704362273673e-06, "loss": 1.0986, "step": 1586 }, { "epoch": 0.12234042553191489, "grad_norm": 4.341008186340332, "learning_rate": 9.778336907944793e-06, "loss": 1.1779, "step": 1587 }, { "epoch": 0.12241751464693185, "grad_norm": 4.198411464691162, "learning_rate": 9.777969155711356e-06, "loss": 1.1633, "step": 1588 }, { "epoch": 0.12249460376194882, "grad_norm": 3.935933828353882, "learning_rate": 9.777601105596288e-06, "loss": 1.1661, "step": 1589 }, { "epoch": 0.12257169287696577, "grad_norm": 3.575408935546875, "learning_rate": 9.777232757622534e-06, "loss": 1.037, "step": 1590 }, { "epoch": 0.12264878199198273, "grad_norm": 4.11090087890625, "learning_rate": 9.77686411181306e-06, "loss": 1.2121, "step": 1591 }, { "epoch": 0.12272587110699969, "grad_norm": 4.116640090942383, "learning_rate": 9.77649516819085e-06, "loss": 1.0669, "step": 1592 }, { "epoch": 0.12280296022201666, "grad_norm": 3.7206943035125732, "learning_rate": 9.776125926778902e-06, "loss": 1.0977, "step": 1593 }, { "epoch": 0.12288004933703361, "grad_norm": 3.60390305519104, "learning_rate": 9.775756387600239e-06, "loss": 1.0635, "step": 1594 }, { "epoch": 0.12295713845205057, "grad_norm": 4.16797399520874, "learning_rate": 9.7753865506779e-06, "loss": 1.1748, "step": 1595 }, { "epoch": 0.12303422756706753, "grad_norm": 3.8636176586151123, "learning_rate": 9.77501641603494e-06, "loss": 1.1328, "step": 1596 }, { "epoch": 0.12311131668208448, "grad_norm": 4.086244583129883, "learning_rate": 9.77464598369444e-06, "loss": 1.2368, "step": 1597 }, { "epoch": 0.12318840579710146, "grad_norm": 3.9947853088378906, "learning_rate": 9.77427525367949e-06, "loss": 1.0748, "step": 1598 }, { "epoch": 0.12326549491211841, "grad_norm": 3.5423076152801514, "learning_rate": 9.773904226013207e-06, "loss": 1.1288, "step": 1599 }, { "epoch": 0.12334258402713537, "grad_norm": 4.289074897766113, "learning_rate": 9.773532900718717e-06, "loss": 1.2039, "step": 1600 }, { "epoch": 0.12341967314215233, "grad_norm": 4.612265586853027, "learning_rate": 9.773161277819175e-06, "loss": 1.2929, "step": 1601 }, { "epoch": 0.12349676225716928, "grad_norm": 4.081154823303223, "learning_rate": 9.772789357337746e-06, "loss": 1.221, "step": 1602 }, { "epoch": 0.12357385137218625, "grad_norm": 4.336784839630127, "learning_rate": 9.772417139297622e-06, "loss": 1.2045, "step": 1603 }, { "epoch": 0.12365094048720321, "grad_norm": 4.129827499389648, "learning_rate": 9.772044623722005e-06, "loss": 1.1447, "step": 1604 }, { "epoch": 0.12372802960222017, "grad_norm": 4.177978515625, "learning_rate": 9.771671810634123e-06, "loss": 1.1951, "step": 1605 }, { "epoch": 0.12380511871723712, "grad_norm": 3.896273374557495, "learning_rate": 9.771298700057214e-06, "loss": 1.1928, "step": 1606 }, { "epoch": 0.12388220783225408, "grad_norm": 3.946012258529663, "learning_rate": 9.770925292014542e-06, "loss": 1.1168, "step": 1607 }, { "epoch": 0.12395929694727105, "grad_norm": 4.167520046234131, "learning_rate": 9.77055158652939e-06, "loss": 1.3042, "step": 1608 }, { "epoch": 0.124036386062288, "grad_norm": 4.672422885894775, "learning_rate": 9.77017758362505e-06, "loss": 1.2575, "step": 1609 }, { "epoch": 0.12411347517730496, "grad_norm": 4.5983123779296875, "learning_rate": 9.769803283324847e-06, "loss": 1.1992, "step": 1610 }, { "epoch": 0.12419056429232192, "grad_norm": 4.06178092956543, "learning_rate": 9.769428685652112e-06, "loss": 1.2428, "step": 1611 }, { "epoch": 0.12426765340733889, "grad_norm": 4.024518013000488, "learning_rate": 9.769053790630198e-06, "loss": 1.1464, "step": 1612 }, { "epoch": 0.12434474252235585, "grad_norm": 4.302224636077881, "learning_rate": 9.768678598282481e-06, "loss": 1.0665, "step": 1613 }, { "epoch": 0.1244218316373728, "grad_norm": 4.330562114715576, "learning_rate": 9.76830310863235e-06, "loss": 1.1544, "step": 1614 }, { "epoch": 0.12449892075238976, "grad_norm": 4.35994291305542, "learning_rate": 9.767927321703217e-06, "loss": 1.1948, "step": 1615 }, { "epoch": 0.12457600986740672, "grad_norm": 3.8011748790740967, "learning_rate": 9.767551237518508e-06, "loss": 1.155, "step": 1616 }, { "epoch": 0.12465309898242369, "grad_norm": 3.746467113494873, "learning_rate": 9.767174856101672e-06, "loss": 1.1757, "step": 1617 }, { "epoch": 0.12473018809744064, "grad_norm": 3.849261522293091, "learning_rate": 9.766798177476175e-06, "loss": 0.9983, "step": 1618 }, { "epoch": 0.1248072772124576, "grad_norm": 4.078786849975586, "learning_rate": 9.766421201665498e-06, "loss": 1.2175, "step": 1619 }, { "epoch": 0.12488436632747456, "grad_norm": 4.006483554840088, "learning_rate": 9.766043928693146e-06, "loss": 1.2068, "step": 1620 }, { "epoch": 0.12496145544249151, "grad_norm": 4.358697891235352, "learning_rate": 9.765666358582637e-06, "loss": 1.1717, "step": 1621 }, { "epoch": 0.12503854455750849, "grad_norm": 4.190584182739258, "learning_rate": 9.765288491357514e-06, "loss": 1.1927, "step": 1622 }, { "epoch": 0.12511563367252543, "grad_norm": 4.142174243927002, "learning_rate": 9.764910327041333e-06, "loss": 1.1538, "step": 1623 }, { "epoch": 0.1251927227875424, "grad_norm": 3.6901378631591797, "learning_rate": 9.764531865657671e-06, "loss": 1.1287, "step": 1624 }, { "epoch": 0.12526981190255937, "grad_norm": 3.891272783279419, "learning_rate": 9.764153107230124e-06, "loss": 1.084, "step": 1625 }, { "epoch": 0.1253469010175763, "grad_norm": 4.083333969116211, "learning_rate": 9.763774051782306e-06, "loss": 1.1683, "step": 1626 }, { "epoch": 0.12542399013259328, "grad_norm": 3.742480516433716, "learning_rate": 9.763394699337846e-06, "loss": 1.1251, "step": 1627 }, { "epoch": 0.12550107924761023, "grad_norm": 4.092676639556885, "learning_rate": 9.763015049920397e-06, "loss": 1.1928, "step": 1628 }, { "epoch": 0.1255781683626272, "grad_norm": 3.6800365447998047, "learning_rate": 9.76263510355363e-06, "loss": 1.1753, "step": 1629 }, { "epoch": 0.12565525747764417, "grad_norm": 4.610281944274902, "learning_rate": 9.762254860261229e-06, "loss": 1.2049, "step": 1630 }, { "epoch": 0.1257323465926611, "grad_norm": 3.89145827293396, "learning_rate": 9.761874320066903e-06, "loss": 0.9942, "step": 1631 }, { "epoch": 0.12580943570767808, "grad_norm": 3.8079841136932373, "learning_rate": 9.761493482994374e-06, "loss": 1.2347, "step": 1632 }, { "epoch": 0.12588652482269502, "grad_norm": 4.090712547302246, "learning_rate": 9.761112349067387e-06, "loss": 1.1586, "step": 1633 }, { "epoch": 0.125963613937712, "grad_norm": 3.8824753761291504, "learning_rate": 9.760730918309702e-06, "loss": 1.1221, "step": 1634 }, { "epoch": 0.12604070305272896, "grad_norm": 3.8684356212615967, "learning_rate": 9.760349190745104e-06, "loss": 1.0676, "step": 1635 }, { "epoch": 0.1261177921677459, "grad_norm": 4.076964378356934, "learning_rate": 9.759967166397386e-06, "loss": 1.1901, "step": 1636 }, { "epoch": 0.12619488128276288, "grad_norm": 3.883690357208252, "learning_rate": 9.759584845290368e-06, "loss": 1.179, "step": 1637 }, { "epoch": 0.12627197039777982, "grad_norm": 3.9495391845703125, "learning_rate": 9.759202227447888e-06, "loss": 1.2468, "step": 1638 }, { "epoch": 0.1263490595127968, "grad_norm": 4.153524398803711, "learning_rate": 9.758819312893795e-06, "loss": 1.0873, "step": 1639 }, { "epoch": 0.12642614862781376, "grad_norm": 3.8686201572418213, "learning_rate": 9.758436101651965e-06, "loss": 1.0967, "step": 1640 }, { "epoch": 0.1265032377428307, "grad_norm": 4.673964977264404, "learning_rate": 9.758052593746287e-06, "loss": 1.2438, "step": 1641 }, { "epoch": 0.12658032685784767, "grad_norm": 4.521989345550537, "learning_rate": 9.757668789200676e-06, "loss": 1.2298, "step": 1642 }, { "epoch": 0.12665741597286465, "grad_norm": 4.26433801651001, "learning_rate": 9.757284688039054e-06, "loss": 1.1951, "step": 1643 }, { "epoch": 0.1267345050878816, "grad_norm": 3.7774715423583984, "learning_rate": 9.75690029028537e-06, "loss": 1.2376, "step": 1644 }, { "epoch": 0.12681159420289856, "grad_norm": 4.080611228942871, "learning_rate": 9.756515595963591e-06, "loss": 1.1003, "step": 1645 }, { "epoch": 0.1268886833179155, "grad_norm": 4.495629787445068, "learning_rate": 9.7561306050977e-06, "loss": 1.1129, "step": 1646 }, { "epoch": 0.12696577243293247, "grad_norm": 4.433581352233887, "learning_rate": 9.755745317711696e-06, "loss": 1.2694, "step": 1647 }, { "epoch": 0.12704286154794944, "grad_norm": 4.003334045410156, "learning_rate": 9.755359733829604e-06, "loss": 1.1851, "step": 1648 }, { "epoch": 0.12711995066296639, "grad_norm": 4.140914440155029, "learning_rate": 9.75497385347546e-06, "loss": 1.1482, "step": 1649 }, { "epoch": 0.12719703977798336, "grad_norm": 4.328746795654297, "learning_rate": 9.754587676673323e-06, "loss": 1.2779, "step": 1650 }, { "epoch": 0.1272741288930003, "grad_norm": 4.75211238861084, "learning_rate": 9.754201203447268e-06, "loss": 1.2699, "step": 1651 }, { "epoch": 0.12735121800801727, "grad_norm": 3.97086763381958, "learning_rate": 9.753814433821393e-06, "loss": 1.189, "step": 1652 }, { "epoch": 0.12742830712303424, "grad_norm": 4.095003604888916, "learning_rate": 9.753427367819808e-06, "loss": 1.1897, "step": 1653 }, { "epoch": 0.12750539623805118, "grad_norm": 4.253332614898682, "learning_rate": 9.753040005466645e-06, "loss": 1.1711, "step": 1654 }, { "epoch": 0.12758248535306815, "grad_norm": 3.9506523609161377, "learning_rate": 9.752652346786054e-06, "loss": 1.2071, "step": 1655 }, { "epoch": 0.1276595744680851, "grad_norm": 4.006979465484619, "learning_rate": 9.752264391802203e-06, "loss": 1.1794, "step": 1656 }, { "epoch": 0.12773666358310207, "grad_norm": 4.240119457244873, "learning_rate": 9.75187614053928e-06, "loss": 1.1955, "step": 1657 }, { "epoch": 0.12781375269811904, "grad_norm": 4.19885778427124, "learning_rate": 9.751487593021491e-06, "loss": 1.1181, "step": 1658 }, { "epoch": 0.12789084181313598, "grad_norm": 3.8711414337158203, "learning_rate": 9.75109874927306e-06, "loss": 1.0409, "step": 1659 }, { "epoch": 0.12796793092815295, "grad_norm": 4.2193756103515625, "learning_rate": 9.750709609318227e-06, "loss": 1.255, "step": 1660 }, { "epoch": 0.1280450200431699, "grad_norm": 4.001354694366455, "learning_rate": 9.750320173181256e-06, "loss": 1.0984, "step": 1661 }, { "epoch": 0.12812210915818686, "grad_norm": 3.9252028465270996, "learning_rate": 9.749930440886424e-06, "loss": 1.0892, "step": 1662 }, { "epoch": 0.12819919827320383, "grad_norm": 4.056916236877441, "learning_rate": 9.749540412458028e-06, "loss": 1.177, "step": 1663 }, { "epoch": 0.12827628738822078, "grad_norm": 4.366199970245361, "learning_rate": 9.749150087920386e-06, "loss": 1.1389, "step": 1664 }, { "epoch": 0.12835337650323775, "grad_norm": 3.8969857692718506, "learning_rate": 9.748759467297835e-06, "loss": 1.1479, "step": 1665 }, { "epoch": 0.1284304656182547, "grad_norm": 4.55438756942749, "learning_rate": 9.748368550614723e-06, "loss": 1.3332, "step": 1666 }, { "epoch": 0.12850755473327166, "grad_norm": 4.168787002563477, "learning_rate": 9.747977337895426e-06, "loss": 1.1065, "step": 1667 }, { "epoch": 0.12858464384828863, "grad_norm": 4.430618762969971, "learning_rate": 9.747585829164332e-06, "loss": 1.176, "step": 1668 }, { "epoch": 0.12866173296330558, "grad_norm": 4.035445690155029, "learning_rate": 9.747194024445851e-06, "loss": 1.2115, "step": 1669 }, { "epoch": 0.12873882207832255, "grad_norm": 3.8563239574432373, "learning_rate": 9.746801923764409e-06, "loss": 1.1879, "step": 1670 }, { "epoch": 0.1288159111933395, "grad_norm": 4.492901802062988, "learning_rate": 9.74640952714445e-06, "loss": 1.2354, "step": 1671 }, { "epoch": 0.12889300030835646, "grad_norm": 3.8549299240112305, "learning_rate": 9.746016834610438e-06, "loss": 1.1373, "step": 1672 }, { "epoch": 0.12897008942337343, "grad_norm": 3.6520705223083496, "learning_rate": 9.745623846186858e-06, "loss": 1.0816, "step": 1673 }, { "epoch": 0.12904717853839037, "grad_norm": 4.377813816070557, "learning_rate": 9.74523056189821e-06, "loss": 1.1365, "step": 1674 }, { "epoch": 0.12912426765340734, "grad_norm": 3.965122699737549, "learning_rate": 9.744836981769013e-06, "loss": 1.0827, "step": 1675 }, { "epoch": 0.12920135676842429, "grad_norm": 4.009072303771973, "learning_rate": 9.744443105823802e-06, "loss": 1.1322, "step": 1676 }, { "epoch": 0.12927844588344126, "grad_norm": 4.5139994621276855, "learning_rate": 9.744048934087138e-06, "loss": 1.2437, "step": 1677 }, { "epoch": 0.12935553499845823, "grad_norm": 3.827956199645996, "learning_rate": 9.743654466583591e-06, "loss": 1.1468, "step": 1678 }, { "epoch": 0.12943262411347517, "grad_norm": 3.9646661281585693, "learning_rate": 9.743259703337758e-06, "loss": 1.1022, "step": 1679 }, { "epoch": 0.12950971322849214, "grad_norm": 3.9882500171661377, "learning_rate": 9.742864644374248e-06, "loss": 1.1333, "step": 1680 }, { "epoch": 0.12958680234350908, "grad_norm": 3.8956985473632812, "learning_rate": 9.74246928971769e-06, "loss": 1.2208, "step": 1681 }, { "epoch": 0.12966389145852605, "grad_norm": 4.2606000900268555, "learning_rate": 9.742073639392735e-06, "loss": 1.2494, "step": 1682 }, { "epoch": 0.12974098057354302, "grad_norm": 4.1300530433654785, "learning_rate": 9.741677693424048e-06, "loss": 1.111, "step": 1683 }, { "epoch": 0.12981806968855997, "grad_norm": 3.594024896621704, "learning_rate": 9.741281451836313e-06, "loss": 1.0742, "step": 1684 }, { "epoch": 0.12989515880357694, "grad_norm": 4.158742427825928, "learning_rate": 9.740884914654237e-06, "loss": 1.0348, "step": 1685 }, { "epoch": 0.1299722479185939, "grad_norm": 3.8110404014587402, "learning_rate": 9.74048808190254e-06, "loss": 1.1757, "step": 1686 }, { "epoch": 0.13004933703361085, "grad_norm": 3.863544464111328, "learning_rate": 9.740090953605963e-06, "loss": 1.0151, "step": 1687 }, { "epoch": 0.13012642614862782, "grad_norm": 4.125679016113281, "learning_rate": 9.739693529789264e-06, "loss": 1.1922, "step": 1688 }, { "epoch": 0.13020351526364476, "grad_norm": 4.0904083251953125, "learning_rate": 9.73929581047722e-06, "loss": 1.0156, "step": 1689 }, { "epoch": 0.13028060437866174, "grad_norm": 4.615362644195557, "learning_rate": 9.73889779569463e-06, "loss": 1.2547, "step": 1690 }, { "epoch": 0.1303576934936787, "grad_norm": 3.9230780601501465, "learning_rate": 9.738499485466304e-06, "loss": 1.1324, "step": 1691 }, { "epoch": 0.13043478260869565, "grad_norm": 4.175113201141357, "learning_rate": 9.738100879817077e-06, "loss": 1.1832, "step": 1692 }, { "epoch": 0.13051187172371262, "grad_norm": 3.8793387413024902, "learning_rate": 9.7377019787718e-06, "loss": 1.0902, "step": 1693 }, { "epoch": 0.13058896083872956, "grad_norm": 4.145679950714111, "learning_rate": 9.73730278235534e-06, "loss": 1.1542, "step": 1694 }, { "epoch": 0.13066604995374653, "grad_norm": 4.0476179122924805, "learning_rate": 9.736903290592589e-06, "loss": 1.2334, "step": 1695 }, { "epoch": 0.1307431390687635, "grad_norm": 3.9774255752563477, "learning_rate": 9.73650350350845e-06, "loss": 1.1498, "step": 1696 }, { "epoch": 0.13082022818378045, "grad_norm": 3.953183174133301, "learning_rate": 9.736103421127847e-06, "loss": 1.1513, "step": 1697 }, { "epoch": 0.13089731729879742, "grad_norm": 4.064455986022949, "learning_rate": 9.735703043475727e-06, "loss": 1.2054, "step": 1698 }, { "epoch": 0.13097440641381436, "grad_norm": 4.00160026550293, "learning_rate": 9.735302370577049e-06, "loss": 1.13, "step": 1699 }, { "epoch": 0.13105149552883133, "grad_norm": 4.219839096069336, "learning_rate": 9.734901402456792e-06, "loss": 1.1448, "step": 1700 }, { "epoch": 0.1311285846438483, "grad_norm": 4.194035053253174, "learning_rate": 9.734500139139952e-06, "loss": 1.1548, "step": 1701 }, { "epoch": 0.13120567375886524, "grad_norm": 3.898303985595703, "learning_rate": 9.734098580651555e-06, "loss": 1.119, "step": 1702 }, { "epoch": 0.13128276287388221, "grad_norm": 4.194339752197266, "learning_rate": 9.733696727016626e-06, "loss": 1.1971, "step": 1703 }, { "epoch": 0.13135985198889916, "grad_norm": 4.266892433166504, "learning_rate": 9.733294578260224e-06, "loss": 1.1156, "step": 1704 }, { "epoch": 0.13143694110391613, "grad_norm": 3.6440038681030273, "learning_rate": 9.73289213440742e-06, "loss": 1.1899, "step": 1705 }, { "epoch": 0.1315140302189331, "grad_norm": 4.273855686187744, "learning_rate": 9.7324893954833e-06, "loss": 1.1433, "step": 1706 }, { "epoch": 0.13159111933395004, "grad_norm": 3.3972716331481934, "learning_rate": 9.73208636151298e-06, "loss": 1.0053, "step": 1707 }, { "epoch": 0.131668208448967, "grad_norm": 3.7814152240753174, "learning_rate": 9.731683032521583e-06, "loss": 1.16, "step": 1708 }, { "epoch": 0.13174529756398395, "grad_norm": 4.149326324462891, "learning_rate": 9.731279408534255e-06, "loss": 1.1135, "step": 1709 }, { "epoch": 0.13182238667900092, "grad_norm": 4.2798051834106445, "learning_rate": 9.73087548957616e-06, "loss": 1.2493, "step": 1710 }, { "epoch": 0.1318994757940179, "grad_norm": 3.8137776851654053, "learning_rate": 9.730471275672478e-06, "loss": 1.1796, "step": 1711 }, { "epoch": 0.13197656490903484, "grad_norm": 4.015539646148682, "learning_rate": 9.730066766848415e-06, "loss": 1.1601, "step": 1712 }, { "epoch": 0.1320536540240518, "grad_norm": 3.887502431869507, "learning_rate": 9.729661963129183e-06, "loss": 1.1712, "step": 1713 }, { "epoch": 0.13213074313906875, "grad_norm": 4.104816436767578, "learning_rate": 9.729256864540025e-06, "loss": 1.1473, "step": 1714 }, { "epoch": 0.13220783225408572, "grad_norm": 3.9987316131591797, "learning_rate": 9.728851471106195e-06, "loss": 1.223, "step": 1715 }, { "epoch": 0.1322849213691027, "grad_norm": 4.665685176849365, "learning_rate": 9.728445782852967e-06, "loss": 1.1008, "step": 1716 }, { "epoch": 0.13236201048411964, "grad_norm": 4.16191291809082, "learning_rate": 9.728039799805635e-06, "loss": 1.0905, "step": 1717 }, { "epoch": 0.1324390995991366, "grad_norm": 4.354928016662598, "learning_rate": 9.727633521989505e-06, "loss": 1.1924, "step": 1718 }, { "epoch": 0.13251618871415355, "grad_norm": 4.347275733947754, "learning_rate": 9.727226949429913e-06, "loss": 1.169, "step": 1719 }, { "epoch": 0.13259327782917052, "grad_norm": 4.061772346496582, "learning_rate": 9.726820082152204e-06, "loss": 1.2223, "step": 1720 }, { "epoch": 0.1326703669441875, "grad_norm": 4.027084827423096, "learning_rate": 9.726412920181742e-06, "loss": 1.1924, "step": 1721 }, { "epoch": 0.13274745605920443, "grad_norm": 4.302231788635254, "learning_rate": 9.726005463543913e-06, "loss": 1.1269, "step": 1722 }, { "epoch": 0.1328245451742214, "grad_norm": 3.7703239917755127, "learning_rate": 9.725597712264123e-06, "loss": 1.1173, "step": 1723 }, { "epoch": 0.13290163428923835, "grad_norm": 3.777994155883789, "learning_rate": 9.725189666367787e-06, "loss": 1.0722, "step": 1724 }, { "epoch": 0.13297872340425532, "grad_norm": 3.945777177810669, "learning_rate": 9.724781325880348e-06, "loss": 1.0914, "step": 1725 }, { "epoch": 0.1330558125192723, "grad_norm": 4.009458065032959, "learning_rate": 9.724372690827264e-06, "loss": 1.0405, "step": 1726 }, { "epoch": 0.13313290163428923, "grad_norm": 4.944936752319336, "learning_rate": 9.723963761234013e-06, "loss": 1.2138, "step": 1727 }, { "epoch": 0.1332099907493062, "grad_norm": 3.801499605178833, "learning_rate": 9.723554537126084e-06, "loss": 1.1508, "step": 1728 }, { "epoch": 0.13328707986432317, "grad_norm": 3.6166648864746094, "learning_rate": 9.723145018528995e-06, "loss": 1.1144, "step": 1729 }, { "epoch": 0.13336416897934011, "grad_norm": 3.8566949367523193, "learning_rate": 9.722735205468277e-06, "loss": 1.0411, "step": 1730 }, { "epoch": 0.13344125809435708, "grad_norm": 3.865793466567993, "learning_rate": 9.722325097969477e-06, "loss": 1.153, "step": 1731 }, { "epoch": 0.13351834720937403, "grad_norm": 4.346949577331543, "learning_rate": 9.721914696058165e-06, "loss": 1.2115, "step": 1732 }, { "epoch": 0.133595436324391, "grad_norm": 3.8929085731506348, "learning_rate": 9.721503999759926e-06, "loss": 1.1708, "step": 1733 }, { "epoch": 0.13367252543940797, "grad_norm": 4.4507575035095215, "learning_rate": 9.721093009100368e-06, "loss": 1.2546, "step": 1734 }, { "epoch": 0.1337496145544249, "grad_norm": 4.165073394775391, "learning_rate": 9.720681724105112e-06, "loss": 1.1604, "step": 1735 }, { "epoch": 0.13382670366944188, "grad_norm": 3.6533870697021484, "learning_rate": 9.7202701447998e-06, "loss": 1.0856, "step": 1736 }, { "epoch": 0.13390379278445882, "grad_norm": 3.7656784057617188, "learning_rate": 9.71985827121009e-06, "loss": 1.1357, "step": 1737 }, { "epoch": 0.1339808818994758, "grad_norm": 4.213956356048584, "learning_rate": 9.719446103361662e-06, "loss": 1.1676, "step": 1738 }, { "epoch": 0.13405797101449277, "grad_norm": 4.172744274139404, "learning_rate": 9.719033641280211e-06, "loss": 1.2312, "step": 1739 }, { "epoch": 0.1341350601295097, "grad_norm": 4.124050140380859, "learning_rate": 9.718620884991455e-06, "loss": 1.1368, "step": 1740 }, { "epoch": 0.13421214924452668, "grad_norm": 4.168154716491699, "learning_rate": 9.718207834521124e-06, "loss": 1.0489, "step": 1741 }, { "epoch": 0.13428923835954362, "grad_norm": 4.139477252960205, "learning_rate": 9.71779448989497e-06, "loss": 1.0745, "step": 1742 }, { "epoch": 0.1343663274745606, "grad_norm": 4.274909496307373, "learning_rate": 9.717380851138765e-06, "loss": 1.0202, "step": 1743 }, { "epoch": 0.13444341658957756, "grad_norm": 3.696258306503296, "learning_rate": 9.716966918278295e-06, "loss": 1.1022, "step": 1744 }, { "epoch": 0.1345205057045945, "grad_norm": 3.8992645740509033, "learning_rate": 9.716552691339369e-06, "loss": 1.0238, "step": 1745 }, { "epoch": 0.13459759481961148, "grad_norm": 4.183124542236328, "learning_rate": 9.716138170347808e-06, "loss": 1.1277, "step": 1746 }, { "epoch": 0.13467468393462842, "grad_norm": 3.98201584815979, "learning_rate": 9.71572335532946e-06, "loss": 1.2144, "step": 1747 }, { "epoch": 0.1347517730496454, "grad_norm": 4.045650005340576, "learning_rate": 9.715308246310181e-06, "loss": 1.1166, "step": 1748 }, { "epoch": 0.13482886216466236, "grad_norm": 4.4853973388671875, "learning_rate": 9.714892843315857e-06, "loss": 1.1546, "step": 1749 }, { "epoch": 0.1349059512796793, "grad_norm": 4.153515815734863, "learning_rate": 9.714477146372383e-06, "loss": 1.0463, "step": 1750 }, { "epoch": 0.13498304039469627, "grad_norm": 4.120546817779541, "learning_rate": 9.714061155505673e-06, "loss": 1.1675, "step": 1751 }, { "epoch": 0.13506012950971322, "grad_norm": 4.298232555389404, "learning_rate": 9.713644870741669e-06, "loss": 1.1224, "step": 1752 }, { "epoch": 0.1351372186247302, "grad_norm": 3.9219980239868164, "learning_rate": 9.713228292106319e-06, "loss": 1.1257, "step": 1753 }, { "epoch": 0.13521430773974716, "grad_norm": 3.6702487468719482, "learning_rate": 9.712811419625592e-06, "loss": 1.1176, "step": 1754 }, { "epoch": 0.1352913968547641, "grad_norm": 4.173069953918457, "learning_rate": 9.712394253325483e-06, "loss": 1.07, "step": 1755 }, { "epoch": 0.13536848596978107, "grad_norm": 4.047333240509033, "learning_rate": 9.711976793232e-06, "loss": 1.1868, "step": 1756 }, { "epoch": 0.13544557508479801, "grad_norm": 4.153733730316162, "learning_rate": 9.711559039371165e-06, "loss": 1.1111, "step": 1757 }, { "epoch": 0.13552266419981499, "grad_norm": 4.564488410949707, "learning_rate": 9.711140991769028e-06, "loss": 1.1292, "step": 1758 }, { "epoch": 0.13559975331483196, "grad_norm": 4.647412300109863, "learning_rate": 9.710722650451649e-06, "loss": 1.1204, "step": 1759 }, { "epoch": 0.1356768424298489, "grad_norm": 4.287388324737549, "learning_rate": 9.71030401544511e-06, "loss": 1.1814, "step": 1760 }, { "epoch": 0.13575393154486587, "grad_norm": 4.7093024253845215, "learning_rate": 9.709885086775512e-06, "loss": 1.2857, "step": 1761 }, { "epoch": 0.1358310206598828, "grad_norm": 4.541007041931152, "learning_rate": 9.709465864468971e-06, "loss": 1.2471, "step": 1762 }, { "epoch": 0.13590810977489978, "grad_norm": 4.468155860900879, "learning_rate": 9.709046348551626e-06, "loss": 1.1188, "step": 1763 }, { "epoch": 0.13598519888991675, "grad_norm": 4.554619312286377, "learning_rate": 9.708626539049628e-06, "loss": 1.2205, "step": 1764 }, { "epoch": 0.1360622880049337, "grad_norm": 3.8080332279205322, "learning_rate": 9.708206435989152e-06, "loss": 1.0702, "step": 1765 }, { "epoch": 0.13613937711995067, "grad_norm": 4.381828308105469, "learning_rate": 9.707786039396389e-06, "loss": 0.9736, "step": 1766 }, { "epoch": 0.1362164662349676, "grad_norm": 4.196829319000244, "learning_rate": 9.70736534929755e-06, "loss": 1.1242, "step": 1767 }, { "epoch": 0.13629355534998458, "grad_norm": 4.237636566162109, "learning_rate": 9.70694436571886e-06, "loss": 1.1547, "step": 1768 }, { "epoch": 0.13637064446500155, "grad_norm": 4.274960517883301, "learning_rate": 9.706523088686568e-06, "loss": 1.1723, "step": 1769 }, { "epoch": 0.1364477335800185, "grad_norm": 4.287873268127441, "learning_rate": 9.706101518226939e-06, "loss": 1.1283, "step": 1770 }, { "epoch": 0.13652482269503546, "grad_norm": 4.142662525177002, "learning_rate": 9.705679654366249e-06, "loss": 1.1434, "step": 1771 }, { "epoch": 0.13660191181005243, "grad_norm": 4.222457408905029, "learning_rate": 9.705257497130807e-06, "loss": 1.2198, "step": 1772 }, { "epoch": 0.13667900092506938, "grad_norm": 4.041723251342773, "learning_rate": 9.704835046546928e-06, "loss": 1.0801, "step": 1773 }, { "epoch": 0.13675609004008635, "grad_norm": 3.751447916030884, "learning_rate": 9.704412302640951e-06, "loss": 1.1612, "step": 1774 }, { "epoch": 0.1368331791551033, "grad_norm": 4.4663615226745605, "learning_rate": 9.703989265439233e-06, "loss": 1.0822, "step": 1775 }, { "epoch": 0.13691026827012026, "grad_norm": 3.6068341732025146, "learning_rate": 9.703565934968146e-06, "loss": 1.1349, "step": 1776 }, { "epoch": 0.13698735738513723, "grad_norm": 4.411525726318359, "learning_rate": 9.703142311254083e-06, "loss": 1.1509, "step": 1777 }, { "epoch": 0.13706444650015417, "grad_norm": 4.782855033874512, "learning_rate": 9.702718394323456e-06, "loss": 1.1925, "step": 1778 }, { "epoch": 0.13714153561517115, "grad_norm": 3.9642584323883057, "learning_rate": 9.702294184202692e-06, "loss": 1.1764, "step": 1779 }, { "epoch": 0.1372186247301881, "grad_norm": 3.878993272781372, "learning_rate": 9.70186968091824e-06, "loss": 1.0982, "step": 1780 }, { "epoch": 0.13729571384520506, "grad_norm": 4.065914630889893, "learning_rate": 9.701444884496564e-06, "loss": 1.0681, "step": 1781 }, { "epoch": 0.13737280296022203, "grad_norm": 4.171785354614258, "learning_rate": 9.701019794964151e-06, "loss": 1.0463, "step": 1782 }, { "epoch": 0.13744989207523897, "grad_norm": 4.044963359832764, "learning_rate": 9.700594412347499e-06, "loss": 1.1055, "step": 1783 }, { "epoch": 0.13752698119025594, "grad_norm": 4.143056869506836, "learning_rate": 9.700168736673133e-06, "loss": 1.0994, "step": 1784 }, { "epoch": 0.13760407030527289, "grad_norm": 4.355381965637207, "learning_rate": 9.699742767967586e-06, "loss": 1.2089, "step": 1785 }, { "epoch": 0.13768115942028986, "grad_norm": 3.8300368785858154, "learning_rate": 9.699316506257421e-06, "loss": 1.0759, "step": 1786 }, { "epoch": 0.13775824853530683, "grad_norm": 3.7723491191864014, "learning_rate": 9.698889951569208e-06, "loss": 1.0988, "step": 1787 }, { "epoch": 0.13783533765032377, "grad_norm": 3.426234245300293, "learning_rate": 9.698463103929542e-06, "loss": 1.129, "step": 1788 }, { "epoch": 0.13791242676534074, "grad_norm": 3.9809820652008057, "learning_rate": 9.698035963365038e-06, "loss": 1.0853, "step": 1789 }, { "epoch": 0.13798951588035768, "grad_norm": 4.098197937011719, "learning_rate": 9.697608529902321e-06, "loss": 1.2014, "step": 1790 }, { "epoch": 0.13806660499537465, "grad_norm": 4.355566501617432, "learning_rate": 9.697180803568042e-06, "loss": 1.091, "step": 1791 }, { "epoch": 0.13814369411039162, "grad_norm": 3.870091199874878, "learning_rate": 9.69675278438887e-06, "loss": 1.0783, "step": 1792 }, { "epoch": 0.13822078322540857, "grad_norm": 3.829244375228882, "learning_rate": 9.696324472391486e-06, "loss": 1.0885, "step": 1793 }, { "epoch": 0.13829787234042554, "grad_norm": 3.7947609424591064, "learning_rate": 9.695895867602591e-06, "loss": 0.9867, "step": 1794 }, { "epoch": 0.13837496145544248, "grad_norm": 4.241781234741211, "learning_rate": 9.695466970048912e-06, "loss": 1.1875, "step": 1795 }, { "epoch": 0.13845205057045945, "grad_norm": 4.397819995880127, "learning_rate": 9.695037779757185e-06, "loss": 1.2111, "step": 1796 }, { "epoch": 0.13852913968547642, "grad_norm": 4.159852504730225, "learning_rate": 9.694608296754168e-06, "loss": 1.1907, "step": 1797 }, { "epoch": 0.13860622880049336, "grad_norm": 3.971301317214966, "learning_rate": 9.69417852106664e-06, "loss": 1.1413, "step": 1798 }, { "epoch": 0.13868331791551033, "grad_norm": 4.702109336853027, "learning_rate": 9.693748452721392e-06, "loss": 1.046, "step": 1799 }, { "epoch": 0.13876040703052728, "grad_norm": 4.870896339416504, "learning_rate": 9.693318091745237e-06, "loss": 1.2241, "step": 1800 }, { "epoch": 0.13883749614554425, "grad_norm": 3.7580301761627197, "learning_rate": 9.692887438165007e-06, "loss": 1.1777, "step": 1801 }, { "epoch": 0.13891458526056122, "grad_norm": 3.787914276123047, "learning_rate": 9.692456492007548e-06, "loss": 1.1804, "step": 1802 }, { "epoch": 0.13899167437557816, "grad_norm": 3.9445436000823975, "learning_rate": 9.692025253299732e-06, "loss": 1.135, "step": 1803 }, { "epoch": 0.13906876349059513, "grad_norm": 4.596111297607422, "learning_rate": 9.691593722068442e-06, "loss": 1.1305, "step": 1804 }, { "epoch": 0.13914585260561207, "grad_norm": 4.179446220397949, "learning_rate": 9.69116189834058e-06, "loss": 1.1967, "step": 1805 }, { "epoch": 0.13922294172062905, "grad_norm": 3.8127710819244385, "learning_rate": 9.69072978214307e-06, "loss": 1.1141, "step": 1806 }, { "epoch": 0.13930003083564602, "grad_norm": 4.053515434265137, "learning_rate": 9.690297373502855e-06, "loss": 1.2279, "step": 1807 }, { "epoch": 0.13937711995066296, "grad_norm": 3.7114830017089844, "learning_rate": 9.689864672446887e-06, "loss": 1.0809, "step": 1808 }, { "epoch": 0.13945420906567993, "grad_norm": 3.6122548580169678, "learning_rate": 9.689431679002148e-06, "loss": 1.0588, "step": 1809 }, { "epoch": 0.13953129818069687, "grad_norm": 4.3173604011535645, "learning_rate": 9.68899839319563e-06, "loss": 1.1858, "step": 1810 }, { "epoch": 0.13960838729571384, "grad_norm": 3.7968335151672363, "learning_rate": 9.688564815054349e-06, "loss": 1.1411, "step": 1811 }, { "epoch": 0.1396854764107308, "grad_norm": 4.016263961791992, "learning_rate": 9.688130944605332e-06, "loss": 1.0934, "step": 1812 }, { "epoch": 0.13976256552574776, "grad_norm": 3.948855400085449, "learning_rate": 9.687696781875634e-06, "loss": 1.1486, "step": 1813 }, { "epoch": 0.13983965464076473, "grad_norm": 4.118218898773193, "learning_rate": 9.687262326892317e-06, "loss": 1.1233, "step": 1814 }, { "epoch": 0.1399167437557817, "grad_norm": 4.039265155792236, "learning_rate": 9.68682757968247e-06, "loss": 1.0337, "step": 1815 }, { "epoch": 0.13999383287079864, "grad_norm": 3.6836297512054443, "learning_rate": 9.686392540273198e-06, "loss": 1.1196, "step": 1816 }, { "epoch": 0.1400709219858156, "grad_norm": 4.069727420806885, "learning_rate": 9.685957208691623e-06, "loss": 1.1305, "step": 1817 }, { "epoch": 0.14014801110083255, "grad_norm": 3.9679081439971924, "learning_rate": 9.685521584964885e-06, "loss": 1.1529, "step": 1818 }, { "epoch": 0.14022510021584952, "grad_norm": 3.8460636138916016, "learning_rate": 9.685085669120142e-06, "loss": 1.0946, "step": 1819 }, { "epoch": 0.1403021893308665, "grad_norm": 4.128662586212158, "learning_rate": 9.684649461184574e-06, "loss": 1.2238, "step": 1820 }, { "epoch": 0.14037927844588344, "grad_norm": 4.167182922363281, "learning_rate": 9.684212961185374e-06, "loss": 0.9351, "step": 1821 }, { "epoch": 0.1404563675609004, "grad_norm": 4.262301921844482, "learning_rate": 9.683776169149755e-06, "loss": 1.1101, "step": 1822 }, { "epoch": 0.14053345667591735, "grad_norm": 4.976638317108154, "learning_rate": 9.683339085104952e-06, "loss": 1.1853, "step": 1823 }, { "epoch": 0.14061054579093432, "grad_norm": 4.163305282592773, "learning_rate": 9.68290170907821e-06, "loss": 1.1477, "step": 1824 }, { "epoch": 0.1406876349059513, "grad_norm": 4.624197483062744, "learning_rate": 9.682464041096801e-06, "loss": 1.2981, "step": 1825 }, { "epoch": 0.14076472402096823, "grad_norm": 4.289773464202881, "learning_rate": 9.682026081188009e-06, "loss": 1.1704, "step": 1826 }, { "epoch": 0.1408418131359852, "grad_norm": 5.031392574310303, "learning_rate": 9.681587829379143e-06, "loss": 1.3166, "step": 1827 }, { "epoch": 0.14091890225100215, "grad_norm": 4.640775203704834, "learning_rate": 9.68114928569752e-06, "loss": 1.0848, "step": 1828 }, { "epoch": 0.14099599136601912, "grad_norm": 3.823460817337036, "learning_rate": 9.680710450170482e-06, "loss": 1.1245, "step": 1829 }, { "epoch": 0.1410730804810361, "grad_norm": 4.686859130859375, "learning_rate": 9.680271322825392e-06, "loss": 1.1221, "step": 1830 }, { "epoch": 0.14115016959605303, "grad_norm": 4.298460960388184, "learning_rate": 9.679831903689624e-06, "loss": 1.1457, "step": 1831 }, { "epoch": 0.14122725871107, "grad_norm": 4.24713659286499, "learning_rate": 9.679392192790573e-06, "loss": 1.1776, "step": 1832 }, { "epoch": 0.14130434782608695, "grad_norm": 4.055549621582031, "learning_rate": 9.678952190155655e-06, "loss": 1.1615, "step": 1833 }, { "epoch": 0.14138143694110392, "grad_norm": 4.140902042388916, "learning_rate": 9.678511895812301e-06, "loss": 1.0056, "step": 1834 }, { "epoch": 0.1414585260561209, "grad_norm": 4.143174648284912, "learning_rate": 9.678071309787962e-06, "loss": 1.1289, "step": 1835 }, { "epoch": 0.14153561517113783, "grad_norm": 3.9411823749542236, "learning_rate": 9.677630432110103e-06, "loss": 1.1361, "step": 1836 }, { "epoch": 0.1416127042861548, "grad_norm": 4.089599132537842, "learning_rate": 9.677189262806213e-06, "loss": 1.1156, "step": 1837 }, { "epoch": 0.14168979340117174, "grad_norm": 3.4913694858551025, "learning_rate": 9.676747801903798e-06, "loss": 1.0064, "step": 1838 }, { "epoch": 0.1417668825161887, "grad_norm": 3.990382432937622, "learning_rate": 9.676306049430377e-06, "loss": 1.1155, "step": 1839 }, { "epoch": 0.14184397163120568, "grad_norm": 4.364477634429932, "learning_rate": 9.675864005413494e-06, "loss": 1.1699, "step": 1840 }, { "epoch": 0.14192106074622263, "grad_norm": 4.722569942474365, "learning_rate": 9.675421669880707e-06, "loss": 1.1372, "step": 1841 }, { "epoch": 0.1419981498612396, "grad_norm": 4.089046478271484, "learning_rate": 9.674979042859593e-06, "loss": 1.1623, "step": 1842 }, { "epoch": 0.14207523897625654, "grad_norm": 3.667163610458374, "learning_rate": 9.67453612437775e-06, "loss": 1.0287, "step": 1843 }, { "epoch": 0.1421523280912735, "grad_norm": 4.879021167755127, "learning_rate": 9.674092914462788e-06, "loss": 1.2368, "step": 1844 }, { "epoch": 0.14222941720629048, "grad_norm": 3.920139789581299, "learning_rate": 9.67364941314234e-06, "loss": 1.049, "step": 1845 }, { "epoch": 0.14230650632130742, "grad_norm": 3.814567804336548, "learning_rate": 9.673205620444057e-06, "loss": 1.1007, "step": 1846 }, { "epoch": 0.1423835954363244, "grad_norm": 3.913419008255005, "learning_rate": 9.672761536395608e-06, "loss": 1.1205, "step": 1847 }, { "epoch": 0.14246068455134134, "grad_norm": 3.8160576820373535, "learning_rate": 9.672317161024679e-06, "loss": 1.155, "step": 1848 }, { "epoch": 0.1425377736663583, "grad_norm": 3.9836699962615967, "learning_rate": 9.67187249435897e-06, "loss": 1.0829, "step": 1849 }, { "epoch": 0.14261486278137528, "grad_norm": 3.8983733654022217, "learning_rate": 9.67142753642621e-06, "loss": 1.0859, "step": 1850 }, { "epoch": 0.14269195189639222, "grad_norm": 4.181762218475342, "learning_rate": 9.670982287254136e-06, "loss": 1.0088, "step": 1851 }, { "epoch": 0.1427690410114092, "grad_norm": 4.015240669250488, "learning_rate": 9.670536746870507e-06, "loss": 1.1885, "step": 1852 }, { "epoch": 0.14284613012642614, "grad_norm": 4.08405876159668, "learning_rate": 9.670090915303103e-06, "loss": 1.0023, "step": 1853 }, { "epoch": 0.1429232192414431, "grad_norm": 4.5213189125061035, "learning_rate": 9.669644792579717e-06, "loss": 1.2307, "step": 1854 }, { "epoch": 0.14300030835646008, "grad_norm": 4.091536521911621, "learning_rate": 9.669198378728162e-06, "loss": 1.1163, "step": 1855 }, { "epoch": 0.14307739747147702, "grad_norm": 4.354223728179932, "learning_rate": 9.668751673776272e-06, "loss": 1.0047, "step": 1856 }, { "epoch": 0.143154486586494, "grad_norm": 3.7144696712493896, "learning_rate": 9.668304677751894e-06, "loss": 1.1268, "step": 1857 }, { "epoch": 0.14323157570151096, "grad_norm": 4.206509590148926, "learning_rate": 9.667857390682897e-06, "loss": 1.0878, "step": 1858 }, { "epoch": 0.1433086648165279, "grad_norm": 4.0310492515563965, "learning_rate": 9.667409812597168e-06, "loss": 1.135, "step": 1859 }, { "epoch": 0.14338575393154487, "grad_norm": 4.384598731994629, "learning_rate": 9.66696194352261e-06, "loss": 1.0957, "step": 1860 }, { "epoch": 0.14346284304656182, "grad_norm": 3.8433432579040527, "learning_rate": 9.666513783487145e-06, "loss": 0.9749, "step": 1861 }, { "epoch": 0.1435399321615788, "grad_norm": 3.990506887435913, "learning_rate": 9.666065332518714e-06, "loss": 1.086, "step": 1862 }, { "epoch": 0.14361702127659576, "grad_norm": 3.817073106765747, "learning_rate": 9.665616590645278e-06, "loss": 1.0804, "step": 1863 }, { "epoch": 0.1436941103916127, "grad_norm": 3.8277182579040527, "learning_rate": 9.665167557894808e-06, "loss": 1.0075, "step": 1864 }, { "epoch": 0.14377119950662967, "grad_norm": 3.6581485271453857, "learning_rate": 9.664718234295303e-06, "loss": 1.122, "step": 1865 }, { "epoch": 0.14384828862164661, "grad_norm": 3.7937848567962646, "learning_rate": 9.664268619874776e-06, "loss": 1.0867, "step": 1866 }, { "epoch": 0.14392537773666358, "grad_norm": 4.170339584350586, "learning_rate": 9.663818714661259e-06, "loss": 1.0888, "step": 1867 }, { "epoch": 0.14400246685168056, "grad_norm": 3.6754279136657715, "learning_rate": 9.6633685186828e-06, "loss": 1.0507, "step": 1868 }, { "epoch": 0.1440795559666975, "grad_norm": 3.820255994796753, "learning_rate": 9.662918031967463e-06, "loss": 1.01, "step": 1869 }, { "epoch": 0.14415664508171447, "grad_norm": 4.000062942504883, "learning_rate": 9.662467254543337e-06, "loss": 1.1658, "step": 1870 }, { "epoch": 0.1442337341967314, "grad_norm": 3.684593439102173, "learning_rate": 9.662016186438527e-06, "loss": 1.0732, "step": 1871 }, { "epoch": 0.14431082331174838, "grad_norm": 4.005534648895264, "learning_rate": 9.661564827681152e-06, "loss": 1.0009, "step": 1872 }, { "epoch": 0.14438791242676535, "grad_norm": 3.8382081985473633, "learning_rate": 9.661113178299353e-06, "loss": 1.1442, "step": 1873 }, { "epoch": 0.1444650015417823, "grad_norm": 4.236837863922119, "learning_rate": 9.660661238321289e-06, "loss": 1.0885, "step": 1874 }, { "epoch": 0.14454209065679927, "grad_norm": 3.7837772369384766, "learning_rate": 9.660209007775133e-06, "loss": 1.0377, "step": 1875 }, { "epoch": 0.1446191797718162, "grad_norm": 3.9629247188568115, "learning_rate": 9.659756486689082e-06, "loss": 1.0251, "step": 1876 }, { "epoch": 0.14469626888683318, "grad_norm": 3.9419615268707275, "learning_rate": 9.659303675091348e-06, "loss": 1.181, "step": 1877 }, { "epoch": 0.14477335800185015, "grad_norm": 3.833371162414551, "learning_rate": 9.658850573010162e-06, "loss": 0.9706, "step": 1878 }, { "epoch": 0.1448504471168671, "grad_norm": 4.696406364440918, "learning_rate": 9.658397180473768e-06, "loss": 1.1338, "step": 1879 }, { "epoch": 0.14492753623188406, "grad_norm": 3.9524292945861816, "learning_rate": 9.65794349751044e-06, "loss": 1.032, "step": 1880 }, { "epoch": 0.145004625346901, "grad_norm": 4.391104221343994, "learning_rate": 9.657489524148459e-06, "loss": 1.1298, "step": 1881 }, { "epoch": 0.14508171446191798, "grad_norm": 3.786226749420166, "learning_rate": 9.657035260416126e-06, "loss": 1.0417, "step": 1882 }, { "epoch": 0.14515880357693495, "grad_norm": 3.7811176776885986, "learning_rate": 9.656580706341763e-06, "loss": 1.1276, "step": 1883 }, { "epoch": 0.1452358926919519, "grad_norm": 4.132617473602295, "learning_rate": 9.656125861953711e-06, "loss": 1.0482, "step": 1884 }, { "epoch": 0.14531298180696886, "grad_norm": 4.651538848876953, "learning_rate": 9.655670727280326e-06, "loss": 1.2372, "step": 1885 }, { "epoch": 0.1453900709219858, "grad_norm": 3.871948719024658, "learning_rate": 9.655215302349986e-06, "loss": 1.0957, "step": 1886 }, { "epoch": 0.14546716003700277, "grad_norm": 5.455825328826904, "learning_rate": 9.65475958719108e-06, "loss": 1.0278, "step": 1887 }, { "epoch": 0.14554424915201974, "grad_norm": 3.909919261932373, "learning_rate": 9.65430358183202e-06, "loss": 1.1265, "step": 1888 }, { "epoch": 0.1456213382670367, "grad_norm": 4.387859344482422, "learning_rate": 9.653847286301238e-06, "loss": 1.1409, "step": 1889 }, { "epoch": 0.14569842738205366, "grad_norm": 4.335275650024414, "learning_rate": 9.65339070062718e-06, "loss": 1.0492, "step": 1890 }, { "epoch": 0.1457755164970706, "grad_norm": 3.7810895442962646, "learning_rate": 9.652933824838315e-06, "loss": 0.9817, "step": 1891 }, { "epoch": 0.14585260561208757, "grad_norm": 4.186948299407959, "learning_rate": 9.652476658963122e-06, "loss": 1.1084, "step": 1892 }, { "epoch": 0.14592969472710454, "grad_norm": 4.576048374176025, "learning_rate": 9.652019203030105e-06, "loss": 1.1037, "step": 1893 }, { "epoch": 0.14600678384212148, "grad_norm": 3.583650827407837, "learning_rate": 9.651561457067785e-06, "loss": 1.045, "step": 1894 }, { "epoch": 0.14608387295713846, "grad_norm": 3.9985146522521973, "learning_rate": 9.6511034211047e-06, "loss": 1.0088, "step": 1895 }, { "epoch": 0.1461609620721554, "grad_norm": 4.34122371673584, "learning_rate": 9.650645095169403e-06, "loss": 1.0772, "step": 1896 }, { "epoch": 0.14623805118717237, "grad_norm": 4.4444122314453125, "learning_rate": 9.650186479290472e-06, "loss": 1.1531, "step": 1897 }, { "epoch": 0.14631514030218934, "grad_norm": 3.7410149574279785, "learning_rate": 9.649727573496499e-06, "loss": 0.9934, "step": 1898 }, { "epoch": 0.14639222941720628, "grad_norm": 3.8100745677948, "learning_rate": 9.649268377816092e-06, "loss": 1.0822, "step": 1899 }, { "epoch": 0.14646931853222325, "grad_norm": 4.091446876525879, "learning_rate": 9.64880889227788e-06, "loss": 1.1054, "step": 1900 }, { "epoch": 0.14654640764724022, "grad_norm": 4.245041847229004, "learning_rate": 9.64834911691051e-06, "loss": 1.1384, "step": 1901 }, { "epoch": 0.14662349676225717, "grad_norm": 4.033145427703857, "learning_rate": 9.647889051742649e-06, "loss": 1.0701, "step": 1902 }, { "epoch": 0.14670058587727414, "grad_norm": 4.123029708862305, "learning_rate": 9.647428696802979e-06, "loss": 1.0922, "step": 1903 }, { "epoch": 0.14677767499229108, "grad_norm": 4.149605751037598, "learning_rate": 9.646968052120196e-06, "loss": 1.074, "step": 1904 }, { "epoch": 0.14685476410730805, "grad_norm": 3.9435675144195557, "learning_rate": 9.646507117723023e-06, "loss": 1.0313, "step": 1905 }, { "epoch": 0.14693185322232502, "grad_norm": 4.518271446228027, "learning_rate": 9.646045893640197e-06, "loss": 1.1398, "step": 1906 }, { "epoch": 0.14700894233734196, "grad_norm": 3.978560447692871, "learning_rate": 9.645584379900473e-06, "loss": 1.1128, "step": 1907 }, { "epoch": 0.14708603145235893, "grad_norm": 4.344027042388916, "learning_rate": 9.64512257653262e-06, "loss": 1.0262, "step": 1908 }, { "epoch": 0.14716312056737588, "grad_norm": 4.042947769165039, "learning_rate": 9.644660483565434e-06, "loss": 1.1174, "step": 1909 }, { "epoch": 0.14724020968239285, "grad_norm": 4.025622844696045, "learning_rate": 9.644198101027721e-06, "loss": 1.0611, "step": 1910 }, { "epoch": 0.14731729879740982, "grad_norm": 4.026158809661865, "learning_rate": 9.64373542894831e-06, "loss": 1.083, "step": 1911 }, { "epoch": 0.14739438791242676, "grad_norm": 4.082542419433594, "learning_rate": 9.643272467356048e-06, "loss": 1.132, "step": 1912 }, { "epoch": 0.14747147702744373, "grad_norm": 3.592789888381958, "learning_rate": 9.642809216279793e-06, "loss": 1.019, "step": 1913 }, { "epoch": 0.14754856614246067, "grad_norm": 3.6364972591400146, "learning_rate": 9.64234567574843e-06, "loss": 1.0443, "step": 1914 }, { "epoch": 0.14762565525747764, "grad_norm": 4.323209285736084, "learning_rate": 9.641881845790858e-06, "loss": 1.0813, "step": 1915 }, { "epoch": 0.14770274437249462, "grad_norm": 3.8409078121185303, "learning_rate": 9.641417726435991e-06, "loss": 1.0383, "step": 1916 }, { "epoch": 0.14777983348751156, "grad_norm": 3.611025810241699, "learning_rate": 9.64095331771277e-06, "loss": 1.0272, "step": 1917 }, { "epoch": 0.14785692260252853, "grad_norm": 4.067416667938232, "learning_rate": 9.640488619650145e-06, "loss": 1.1394, "step": 1918 }, { "epoch": 0.14793401171754547, "grad_norm": 3.7739274501800537, "learning_rate": 9.640023632277088e-06, "loss": 1.0281, "step": 1919 }, { "epoch": 0.14801110083256244, "grad_norm": 4.0226263999938965, "learning_rate": 9.639558355622589e-06, "loss": 1.0501, "step": 1920 }, { "epoch": 0.1480881899475794, "grad_norm": 4.34377908706665, "learning_rate": 9.639092789715656e-06, "loss": 1.0817, "step": 1921 }, { "epoch": 0.14816527906259636, "grad_norm": 3.9317402839660645, "learning_rate": 9.638626934585314e-06, "loss": 1.0944, "step": 1922 }, { "epoch": 0.14824236817761333, "grad_norm": 3.948791265487671, "learning_rate": 9.638160790260606e-06, "loss": 1.0707, "step": 1923 }, { "epoch": 0.14831945729263027, "grad_norm": 4.429360389709473, "learning_rate": 9.637694356770595e-06, "loss": 1.1624, "step": 1924 }, { "epoch": 0.14839654640764724, "grad_norm": 4.858312129974365, "learning_rate": 9.637227634144359e-06, "loss": 0.995, "step": 1925 }, { "epoch": 0.1484736355226642, "grad_norm": 3.9074039459228516, "learning_rate": 9.636760622410997e-06, "loss": 1.1135, "step": 1926 }, { "epoch": 0.14855072463768115, "grad_norm": 3.9309797286987305, "learning_rate": 9.636293321599625e-06, "loss": 1.0696, "step": 1927 }, { "epoch": 0.14862781375269812, "grad_norm": 3.8177499771118164, "learning_rate": 9.635825731739376e-06, "loss": 1.0943, "step": 1928 }, { "epoch": 0.14870490286771507, "grad_norm": 4.232987880706787, "learning_rate": 9.635357852859404e-06, "loss": 1.0883, "step": 1929 }, { "epoch": 0.14878199198273204, "grad_norm": 4.08406400680542, "learning_rate": 9.634889684988875e-06, "loss": 1.1446, "step": 1930 }, { "epoch": 0.148859081097749, "grad_norm": 4.512781620025635, "learning_rate": 9.63442122815698e-06, "loss": 1.1857, "step": 1931 }, { "epoch": 0.14893617021276595, "grad_norm": 4.197699069976807, "learning_rate": 9.633952482392924e-06, "loss": 1.0965, "step": 1932 }, { "epoch": 0.14901325932778292, "grad_norm": 3.925628185272217, "learning_rate": 9.63348344772593e-06, "loss": 1.1148, "step": 1933 }, { "epoch": 0.14909034844279986, "grad_norm": 4.278095245361328, "learning_rate": 9.63301412418524e-06, "loss": 1.0734, "step": 1934 }, { "epoch": 0.14916743755781683, "grad_norm": 4.0269269943237305, "learning_rate": 9.632544511800114e-06, "loss": 1.1376, "step": 1935 }, { "epoch": 0.1492445266728338, "grad_norm": 3.731536626815796, "learning_rate": 9.63207461059983e-06, "loss": 1.0154, "step": 1936 }, { "epoch": 0.14932161578785075, "grad_norm": 3.8483800888061523, "learning_rate": 9.631604420613685e-06, "loss": 1.0973, "step": 1937 }, { "epoch": 0.14939870490286772, "grad_norm": 3.9684271812438965, "learning_rate": 9.631133941870993e-06, "loss": 1.0039, "step": 1938 }, { "epoch": 0.14947579401788466, "grad_norm": 4.196876049041748, "learning_rate": 9.630663174401085e-06, "loss": 1.2552, "step": 1939 }, { "epoch": 0.14955288313290163, "grad_norm": 4.365589141845703, "learning_rate": 9.630192118233309e-06, "loss": 1.0885, "step": 1940 }, { "epoch": 0.1496299722479186, "grad_norm": 3.9205033779144287, "learning_rate": 9.629720773397036e-06, "loss": 1.0257, "step": 1941 }, { "epoch": 0.14970706136293555, "grad_norm": 3.8212361335754395, "learning_rate": 9.62924913992165e-06, "loss": 1.0147, "step": 1942 }, { "epoch": 0.14978415047795252, "grad_norm": 4.525760650634766, "learning_rate": 9.628777217836558e-06, "loss": 1.1392, "step": 1943 }, { "epoch": 0.1498612395929695, "grad_norm": 4.177234172821045, "learning_rate": 9.628305007171177e-06, "loss": 1.2419, "step": 1944 }, { "epoch": 0.14993832870798643, "grad_norm": 4.351520538330078, "learning_rate": 9.627832507954949e-06, "loss": 1.1474, "step": 1945 }, { "epoch": 0.1500154178230034, "grad_norm": 4.03515625, "learning_rate": 9.627359720217334e-06, "loss": 1.0986, "step": 1946 }, { "epoch": 0.15009250693802034, "grad_norm": 3.882413387298584, "learning_rate": 9.626886643987806e-06, "loss": 1.1141, "step": 1947 }, { "epoch": 0.1501695960530373, "grad_norm": 4.117105007171631, "learning_rate": 9.626413279295859e-06, "loss": 1.1688, "step": 1948 }, { "epoch": 0.15024668516805428, "grad_norm": 3.7657737731933594, "learning_rate": 9.625939626171004e-06, "loss": 1.0316, "step": 1949 }, { "epoch": 0.15032377428307123, "grad_norm": 4.3290019035339355, "learning_rate": 9.625465684642773e-06, "loss": 1.1699, "step": 1950 }, { "epoch": 0.1504008633980882, "grad_norm": 4.567865371704102, "learning_rate": 9.624991454740709e-06, "loss": 1.1349, "step": 1951 }, { "epoch": 0.15047795251310514, "grad_norm": 3.6431381702423096, "learning_rate": 9.624516936494385e-06, "loss": 0.9967, "step": 1952 }, { "epoch": 0.1505550416281221, "grad_norm": 4.145571231842041, "learning_rate": 9.62404212993338e-06, "loss": 1.1052, "step": 1953 }, { "epoch": 0.15063213074313908, "grad_norm": 3.933396339416504, "learning_rate": 9.623567035087295e-06, "loss": 1.0349, "step": 1954 }, { "epoch": 0.15070921985815602, "grad_norm": 4.341703414916992, "learning_rate": 9.623091651985754e-06, "loss": 1.164, "step": 1955 }, { "epoch": 0.150786308973173, "grad_norm": 3.696288585662842, "learning_rate": 9.622615980658391e-06, "loss": 1.0052, "step": 1956 }, { "epoch": 0.15086339808818994, "grad_norm": 4.249678611755371, "learning_rate": 9.622140021134863e-06, "loss": 1.1709, "step": 1957 }, { "epoch": 0.1509404872032069, "grad_norm": 4.261125087738037, "learning_rate": 9.621663773444843e-06, "loss": 1.0577, "step": 1958 }, { "epoch": 0.15101757631822388, "grad_norm": 3.759453296661377, "learning_rate": 9.621187237618024e-06, "loss": 1.0644, "step": 1959 }, { "epoch": 0.15109466543324082, "grad_norm": 4.062593460083008, "learning_rate": 9.620710413684112e-06, "loss": 1.1486, "step": 1960 }, { "epoch": 0.1511717545482578, "grad_norm": 4.127292156219482, "learning_rate": 9.62023330167284e-06, "loss": 1.0529, "step": 1961 }, { "epoch": 0.15124884366327473, "grad_norm": 4.187131881713867, "learning_rate": 9.619755901613947e-06, "loss": 1.0464, "step": 1962 }, { "epoch": 0.1513259327782917, "grad_norm": 3.996009111404419, "learning_rate": 9.619278213537202e-06, "loss": 1.1509, "step": 1963 }, { "epoch": 0.15140302189330868, "grad_norm": 4.108388423919678, "learning_rate": 9.618800237472385e-06, "loss": 1.0326, "step": 1964 }, { "epoch": 0.15148011100832562, "grad_norm": 3.868997573852539, "learning_rate": 9.618321973449294e-06, "loss": 1.0271, "step": 1965 }, { "epoch": 0.1515572001233426, "grad_norm": 4.561322212219238, "learning_rate": 9.617843421497746e-06, "loss": 1.0995, "step": 1966 }, { "epoch": 0.15163428923835953, "grad_norm": 3.980226755142212, "learning_rate": 9.617364581647578e-06, "loss": 0.983, "step": 1967 }, { "epoch": 0.1517113783533765, "grad_norm": 4.084739685058594, "learning_rate": 9.616885453928641e-06, "loss": 1.257, "step": 1968 }, { "epoch": 0.15178846746839347, "grad_norm": 4.914328575134277, "learning_rate": 9.616406038370809e-06, "loss": 1.1405, "step": 1969 }, { "epoch": 0.15186555658341042, "grad_norm": 3.754298210144043, "learning_rate": 9.615926335003968e-06, "loss": 1.0765, "step": 1970 }, { "epoch": 0.1519426456984274, "grad_norm": 3.9111487865448, "learning_rate": 9.615446343858028e-06, "loss": 1.0048, "step": 1971 }, { "epoch": 0.15201973481344433, "grad_norm": 4.179695129394531, "learning_rate": 9.614966064962911e-06, "loss": 1.0866, "step": 1972 }, { "epoch": 0.1520968239284613, "grad_norm": 3.932072877883911, "learning_rate": 9.614485498348563e-06, "loss": 1.0682, "step": 1973 }, { "epoch": 0.15217391304347827, "grad_norm": 4.217230796813965, "learning_rate": 9.614004644044943e-06, "loss": 1.1172, "step": 1974 }, { "epoch": 0.1522510021584952, "grad_norm": 4.254328727722168, "learning_rate": 9.613523502082029e-06, "loss": 1.1907, "step": 1975 }, { "epoch": 0.15232809127351218, "grad_norm": 4.149200916290283, "learning_rate": 9.613042072489819e-06, "loss": 1.1092, "step": 1976 }, { "epoch": 0.15240518038852913, "grad_norm": 4.106222152709961, "learning_rate": 9.612560355298328e-06, "loss": 1.103, "step": 1977 }, { "epoch": 0.1524822695035461, "grad_norm": 3.983444929122925, "learning_rate": 9.612078350537586e-06, "loss": 1.0695, "step": 1978 }, { "epoch": 0.15255935861856307, "grad_norm": 4.1216959953308105, "learning_rate": 9.611596058237647e-06, "loss": 1.1196, "step": 1979 }, { "epoch": 0.15263644773358, "grad_norm": 3.9716129302978516, "learning_rate": 9.611113478428577e-06, "loss": 1.2409, "step": 1980 }, { "epoch": 0.15271353684859698, "grad_norm": 3.9019081592559814, "learning_rate": 9.610630611140464e-06, "loss": 0.9612, "step": 1981 }, { "epoch": 0.15279062596361392, "grad_norm": 4.146617889404297, "learning_rate": 9.610147456403412e-06, "loss": 1.0516, "step": 1982 }, { "epoch": 0.1528677150786309, "grad_norm": 3.6473166942596436, "learning_rate": 9.609664014247542e-06, "loss": 1.0253, "step": 1983 }, { "epoch": 0.15294480419364787, "grad_norm": 4.137308120727539, "learning_rate": 9.609180284702994e-06, "loss": 1.0064, "step": 1984 }, { "epoch": 0.1530218933086648, "grad_norm": 3.682206869125366, "learning_rate": 9.608696267799928e-06, "loss": 1.1105, "step": 1985 }, { "epoch": 0.15309898242368178, "grad_norm": 3.6087026596069336, "learning_rate": 9.608211963568518e-06, "loss": 0.973, "step": 1986 }, { "epoch": 0.15317607153869875, "grad_norm": 4.12633752822876, "learning_rate": 9.60772737203896e-06, "loss": 1.1194, "step": 1987 }, { "epoch": 0.1532531606537157, "grad_norm": 3.913084030151367, "learning_rate": 9.607242493241463e-06, "loss": 1.1383, "step": 1988 }, { "epoch": 0.15333024976873266, "grad_norm": 4.2011003494262695, "learning_rate": 9.606757327206258e-06, "loss": 1.1338, "step": 1989 }, { "epoch": 0.1534073388837496, "grad_norm": 4.860283851623535, "learning_rate": 9.606271873963591e-06, "loss": 0.9991, "step": 1990 }, { "epoch": 0.15348442799876658, "grad_norm": 3.819664716720581, "learning_rate": 9.605786133543732e-06, "loss": 1.1397, "step": 1991 }, { "epoch": 0.15356151711378355, "grad_norm": 4.044528961181641, "learning_rate": 9.60530010597696e-06, "loss": 1.0723, "step": 1992 }, { "epoch": 0.1536386062288005, "grad_norm": 3.8798742294311523, "learning_rate": 9.604813791293579e-06, "loss": 1.0981, "step": 1993 }, { "epoch": 0.15371569534381746, "grad_norm": 4.1378703117370605, "learning_rate": 9.604327189523906e-06, "loss": 1.1575, "step": 1994 }, { "epoch": 0.1537927844588344, "grad_norm": 5.460372447967529, "learning_rate": 9.60384030069828e-06, "loss": 1.1, "step": 1995 }, { "epoch": 0.15386987357385137, "grad_norm": 4.201817035675049, "learning_rate": 9.603353124847054e-06, "loss": 1.0258, "step": 1996 }, { "epoch": 0.15394696268886834, "grad_norm": 3.7332353591918945, "learning_rate": 9.602865662000604e-06, "loss": 1.1201, "step": 1997 }, { "epoch": 0.1540240518038853, "grad_norm": 3.7747762203216553, "learning_rate": 9.602377912189319e-06, "loss": 1.0083, "step": 1998 }, { "epoch": 0.15410114091890226, "grad_norm": 4.299161434173584, "learning_rate": 9.601889875443605e-06, "loss": 0.969, "step": 1999 }, { "epoch": 0.1541782300339192, "grad_norm": 4.0587263107299805, "learning_rate": 9.601401551793891e-06, "loss": 1.0928, "step": 2000 }, { "epoch": 0.15425531914893617, "grad_norm": 3.8370776176452637, "learning_rate": 9.600912941270624e-06, "loss": 1.1446, "step": 2001 }, { "epoch": 0.15433240826395314, "grad_norm": 3.871699333190918, "learning_rate": 9.600424043904263e-06, "loss": 1.1099, "step": 2002 }, { "epoch": 0.15440949737897008, "grad_norm": 4.0479230880737305, "learning_rate": 9.599934859725288e-06, "loss": 1.1099, "step": 2003 }, { "epoch": 0.15448658649398705, "grad_norm": 4.1467485427856445, "learning_rate": 9.599445388764199e-06, "loss": 1.0611, "step": 2004 }, { "epoch": 0.154563675609004, "grad_norm": 3.658740997314453, "learning_rate": 9.598955631051512e-06, "loss": 1.0018, "step": 2005 }, { "epoch": 0.15464076472402097, "grad_norm": 4.59723424911499, "learning_rate": 9.598465586617757e-06, "loss": 1.1884, "step": 2006 }, { "epoch": 0.15471785383903794, "grad_norm": 3.7733352184295654, "learning_rate": 9.597975255493492e-06, "loss": 1.0512, "step": 2007 }, { "epoch": 0.15479494295405488, "grad_norm": 4.303390026092529, "learning_rate": 9.597484637709282e-06, "loss": 1.0479, "step": 2008 }, { "epoch": 0.15487203206907185, "grad_norm": 3.650953769683838, "learning_rate": 9.596993733295717e-06, "loss": 0.9842, "step": 2009 }, { "epoch": 0.1549491211840888, "grad_norm": 3.5866963863372803, "learning_rate": 9.596502542283399e-06, "loss": 1.1231, "step": 2010 }, { "epoch": 0.15502621029910577, "grad_norm": 3.8881676197052, "learning_rate": 9.596011064702954e-06, "loss": 1.1773, "step": 2011 }, { "epoch": 0.15510329941412274, "grad_norm": 3.849320650100708, "learning_rate": 9.595519300585024e-06, "loss": 1.0562, "step": 2012 }, { "epoch": 0.15518038852913968, "grad_norm": 3.8360087871551514, "learning_rate": 9.595027249960264e-06, "loss": 1.17, "step": 2013 }, { "epoch": 0.15525747764415665, "grad_norm": 3.9184608459472656, "learning_rate": 9.594534912859356e-06, "loss": 1.0578, "step": 2014 }, { "epoch": 0.1553345667591736, "grad_norm": 4.464005470275879, "learning_rate": 9.59404228931299e-06, "loss": 1.0775, "step": 2015 }, { "epoch": 0.15541165587419056, "grad_norm": 3.8075225353240967, "learning_rate": 9.59354937935188e-06, "loss": 0.9951, "step": 2016 }, { "epoch": 0.15548874498920753, "grad_norm": 3.766676902770996, "learning_rate": 9.59305618300676e-06, "loss": 1.1488, "step": 2017 }, { "epoch": 0.15556583410422448, "grad_norm": 3.7311699390411377, "learning_rate": 9.592562700308372e-06, "loss": 1.1167, "step": 2018 }, { "epoch": 0.15564292321924145, "grad_norm": 4.18145751953125, "learning_rate": 9.592068931287486e-06, "loss": 1.1209, "step": 2019 }, { "epoch": 0.1557200123342584, "grad_norm": 3.6943624019622803, "learning_rate": 9.591574875974884e-06, "loss": 1.0483, "step": 2020 }, { "epoch": 0.15579710144927536, "grad_norm": 3.553483009338379, "learning_rate": 9.591080534401371e-06, "loss": 1.0577, "step": 2021 }, { "epoch": 0.15587419056429233, "grad_norm": 3.7024497985839844, "learning_rate": 9.590585906597764e-06, "loss": 1.0646, "step": 2022 }, { "epoch": 0.15595127967930927, "grad_norm": 4.6477837562561035, "learning_rate": 9.590090992594901e-06, "loss": 1.0384, "step": 2023 }, { "epoch": 0.15602836879432624, "grad_norm": 3.604308843612671, "learning_rate": 9.589595792423636e-06, "loss": 0.9234, "step": 2024 }, { "epoch": 0.1561054579093432, "grad_norm": 4.46380090713501, "learning_rate": 9.589100306114842e-06, "loss": 1.1194, "step": 2025 }, { "epoch": 0.15618254702436016, "grad_norm": 4.057259559631348, "learning_rate": 9.588604533699415e-06, "loss": 1.1086, "step": 2026 }, { "epoch": 0.15625963613937713, "grad_norm": 3.9798672199249268, "learning_rate": 9.58810847520826e-06, "loss": 1.0903, "step": 2027 }, { "epoch": 0.15633672525439407, "grad_norm": 4.816097259521484, "learning_rate": 9.587612130672302e-06, "loss": 1.107, "step": 2028 }, { "epoch": 0.15641381436941104, "grad_norm": 4.069070339202881, "learning_rate": 9.587115500122489e-06, "loss": 1.0659, "step": 2029 }, { "epoch": 0.156490903484428, "grad_norm": 4.3908796310424805, "learning_rate": 9.58661858358978e-06, "loss": 0.9479, "step": 2030 }, { "epoch": 0.15656799259944496, "grad_norm": 3.6012563705444336, "learning_rate": 9.586121381105158e-06, "loss": 1.0304, "step": 2031 }, { "epoch": 0.15664508171446193, "grad_norm": 4.042422771453857, "learning_rate": 9.58562389269962e-06, "loss": 1.08, "step": 2032 }, { "epoch": 0.15672217082947887, "grad_norm": 4.079514980316162, "learning_rate": 9.585126118404183e-06, "loss": 1.1668, "step": 2033 }, { "epoch": 0.15679925994449584, "grad_norm": 3.817542314529419, "learning_rate": 9.584628058249878e-06, "loss": 1.0957, "step": 2034 }, { "epoch": 0.1568763490595128, "grad_norm": 4.3651604652404785, "learning_rate": 9.584129712267759e-06, "loss": 1.0863, "step": 2035 }, { "epoch": 0.15695343817452975, "grad_norm": 4.226108074188232, "learning_rate": 9.583631080488893e-06, "loss": 1.0109, "step": 2036 }, { "epoch": 0.15703052728954672, "grad_norm": 3.7006771564483643, "learning_rate": 9.58313216294437e-06, "loss": 0.9356, "step": 2037 }, { "epoch": 0.15710761640456367, "grad_norm": 4.0176100730896, "learning_rate": 9.582632959665293e-06, "loss": 1.0112, "step": 2038 }, { "epoch": 0.15718470551958064, "grad_norm": 4.495166301727295, "learning_rate": 9.582133470682785e-06, "loss": 1.149, "step": 2039 }, { "epoch": 0.1572617946345976, "grad_norm": 4.30600643157959, "learning_rate": 9.581633696027986e-06, "loss": 1.0351, "step": 2040 }, { "epoch": 0.15733888374961455, "grad_norm": 4.271859645843506, "learning_rate": 9.581133635732053e-06, "loss": 1.2107, "step": 2041 }, { "epoch": 0.15741597286463152, "grad_norm": 4.35610818862915, "learning_rate": 9.580633289826166e-06, "loss": 1.2124, "step": 2042 }, { "epoch": 0.15749306197964846, "grad_norm": 3.801640272140503, "learning_rate": 9.580132658341519e-06, "loss": 1.0812, "step": 2043 }, { "epoch": 0.15757015109466543, "grad_norm": 3.8817687034606934, "learning_rate": 9.579631741309319e-06, "loss": 1.1933, "step": 2044 }, { "epoch": 0.1576472402096824, "grad_norm": 3.9286177158355713, "learning_rate": 9.5791305387608e-06, "loss": 1.0202, "step": 2045 }, { "epoch": 0.15772432932469935, "grad_norm": 4.174068450927734, "learning_rate": 9.578629050727208e-06, "loss": 1.1846, "step": 2046 }, { "epoch": 0.15780141843971632, "grad_norm": 4.3177409172058105, "learning_rate": 9.578127277239807e-06, "loss": 1.0551, "step": 2047 }, { "epoch": 0.15787850755473326, "grad_norm": 4.210177898406982, "learning_rate": 9.577625218329882e-06, "loss": 1.0957, "step": 2048 }, { "epoch": 0.15795559666975023, "grad_norm": 4.341221332550049, "learning_rate": 9.577122874028733e-06, "loss": 1.133, "step": 2049 }, { "epoch": 0.1580326857847672, "grad_norm": 4.481259822845459, "learning_rate": 9.576620244367676e-06, "loss": 1.1467, "step": 2050 }, { "epoch": 0.15810977489978414, "grad_norm": 4.296179294586182, "learning_rate": 9.576117329378051e-06, "loss": 1.0854, "step": 2051 }, { "epoch": 0.15818686401480112, "grad_norm": 4.161882400512695, "learning_rate": 9.575614129091211e-06, "loss": 1.099, "step": 2052 }, { "epoch": 0.15826395312981806, "grad_norm": 4.072216033935547, "learning_rate": 9.575110643538528e-06, "loss": 1.1539, "step": 2053 }, { "epoch": 0.15834104224483503, "grad_norm": 3.7138614654541016, "learning_rate": 9.574606872751391e-06, "loss": 1.1287, "step": 2054 }, { "epoch": 0.158418131359852, "grad_norm": 4.442841529846191, "learning_rate": 9.574102816761209e-06, "loss": 1.12, "step": 2055 }, { "epoch": 0.15849522047486894, "grad_norm": 4.6196393966674805, "learning_rate": 9.573598475599405e-06, "loss": 1.0678, "step": 2056 }, { "epoch": 0.1585723095898859, "grad_norm": 3.877990961074829, "learning_rate": 9.573093849297423e-06, "loss": 0.9528, "step": 2057 }, { "epoch": 0.15864939870490286, "grad_norm": 4.252166748046875, "learning_rate": 9.572588937886727e-06, "loss": 1.0336, "step": 2058 }, { "epoch": 0.15872648781991983, "grad_norm": 3.8500077724456787, "learning_rate": 9.57208374139879e-06, "loss": 1.1263, "step": 2059 }, { "epoch": 0.1588035769349368, "grad_norm": 3.8318703174591064, "learning_rate": 9.571578259865112e-06, "loss": 1.0754, "step": 2060 }, { "epoch": 0.15888066604995374, "grad_norm": 4.030394554138184, "learning_rate": 9.571072493317207e-06, "loss": 1.0642, "step": 2061 }, { "epoch": 0.1589577551649707, "grad_norm": 4.103557109832764, "learning_rate": 9.570566441786605e-06, "loss": 1.0528, "step": 2062 }, { "epoch": 0.15903484427998765, "grad_norm": 3.8708770275115967, "learning_rate": 9.570060105304856e-06, "loss": 1.0407, "step": 2063 }, { "epoch": 0.15911193339500462, "grad_norm": 4.086610794067383, "learning_rate": 9.569553483903531e-06, "loss": 1.1052, "step": 2064 }, { "epoch": 0.1591890225100216, "grad_norm": 3.6814303398132324, "learning_rate": 9.569046577614212e-06, "loss": 1.0038, "step": 2065 }, { "epoch": 0.15926611162503854, "grad_norm": 4.457508087158203, "learning_rate": 9.568539386468501e-06, "loss": 1.1407, "step": 2066 }, { "epoch": 0.1593432007400555, "grad_norm": 3.961879253387451, "learning_rate": 9.568031910498021e-06, "loss": 1.18, "step": 2067 }, { "epoch": 0.15942028985507245, "grad_norm": 4.2065253257751465, "learning_rate": 9.56752414973441e-06, "loss": 0.9892, "step": 2068 }, { "epoch": 0.15949737897008942, "grad_norm": 3.6328790187835693, "learning_rate": 9.567016104209326e-06, "loss": 1.0919, "step": 2069 }, { "epoch": 0.1595744680851064, "grad_norm": 3.8659377098083496, "learning_rate": 9.56650777395444e-06, "loss": 1.0576, "step": 2070 }, { "epoch": 0.15965155720012333, "grad_norm": 4.030930995941162, "learning_rate": 9.565999159001442e-06, "loss": 0.9824, "step": 2071 }, { "epoch": 0.1597286463151403, "grad_norm": 4.241566181182861, "learning_rate": 9.565490259382047e-06, "loss": 1.0389, "step": 2072 }, { "epoch": 0.15980573543015728, "grad_norm": 4.0185089111328125, "learning_rate": 9.564981075127979e-06, "loss": 1.0236, "step": 2073 }, { "epoch": 0.15988282454517422, "grad_norm": 4.9292826652526855, "learning_rate": 9.564471606270985e-06, "loss": 1.0604, "step": 2074 }, { "epoch": 0.1599599136601912, "grad_norm": 4.441895008087158, "learning_rate": 9.563961852842824e-06, "loss": 1.0312, "step": 2075 }, { "epoch": 0.16003700277520813, "grad_norm": 3.789350748062134, "learning_rate": 9.56345181487528e-06, "loss": 1.073, "step": 2076 }, { "epoch": 0.1601140918902251, "grad_norm": 4.255967617034912, "learning_rate": 9.562941492400149e-06, "loss": 1.039, "step": 2077 }, { "epoch": 0.16019118100524207, "grad_norm": 3.9734389781951904, "learning_rate": 9.56243088544925e-06, "loss": 1.0608, "step": 2078 }, { "epoch": 0.16026827012025902, "grad_norm": 4.481680870056152, "learning_rate": 9.561919994054414e-06, "loss": 1.1822, "step": 2079 }, { "epoch": 0.16034535923527599, "grad_norm": 4.583976745605469, "learning_rate": 9.561408818247493e-06, "loss": 1.1088, "step": 2080 }, { "epoch": 0.16042244835029293, "grad_norm": 3.912966012954712, "learning_rate": 9.560897358060355e-06, "loss": 1.0697, "step": 2081 }, { "epoch": 0.1604995374653099, "grad_norm": 3.9257588386535645, "learning_rate": 9.56038561352489e-06, "loss": 1.0764, "step": 2082 }, { "epoch": 0.16057662658032687, "grad_norm": 3.7236075401306152, "learning_rate": 9.559873584673e-06, "loss": 1.0295, "step": 2083 }, { "epoch": 0.1606537156953438, "grad_norm": 4.096950531005859, "learning_rate": 9.55936127153661e-06, "loss": 1.0696, "step": 2084 }, { "epoch": 0.16073080481036078, "grad_norm": 3.90035080909729, "learning_rate": 9.558848674147657e-06, "loss": 1.1163, "step": 2085 }, { "epoch": 0.16080789392537773, "grad_norm": 3.8980672359466553, "learning_rate": 9.558335792538099e-06, "loss": 1.0165, "step": 2086 }, { "epoch": 0.1608849830403947, "grad_norm": 4.154537200927734, "learning_rate": 9.557822626739912e-06, "loss": 1.0332, "step": 2087 }, { "epoch": 0.16096207215541167, "grad_norm": 3.6484971046447754, "learning_rate": 9.557309176785092e-06, "loss": 1.011, "step": 2088 }, { "epoch": 0.1610391612704286, "grad_norm": 4.025908470153809, "learning_rate": 9.556795442705647e-06, "loss": 1.0423, "step": 2089 }, { "epoch": 0.16111625038544558, "grad_norm": 4.023404598236084, "learning_rate": 9.556281424533606e-06, "loss": 1.0278, "step": 2090 }, { "epoch": 0.16119333950046252, "grad_norm": 4.211247444152832, "learning_rate": 9.555767122301016e-06, "loss": 1.0354, "step": 2091 }, { "epoch": 0.1612704286154795, "grad_norm": 4.087972640991211, "learning_rate": 9.55525253603994e-06, "loss": 1.0494, "step": 2092 }, { "epoch": 0.16134751773049646, "grad_norm": 3.722517728805542, "learning_rate": 9.554737665782464e-06, "loss": 1.0458, "step": 2093 }, { "epoch": 0.1614246068455134, "grad_norm": 3.7930004596710205, "learning_rate": 9.55422251156068e-06, "loss": 1.0477, "step": 2094 }, { "epoch": 0.16150169596053038, "grad_norm": 3.9096274375915527, "learning_rate": 9.55370707340671e-06, "loss": 1.1077, "step": 2095 }, { "epoch": 0.16157878507554732, "grad_norm": 3.9330289363861084, "learning_rate": 9.553191351352691e-06, "loss": 0.9962, "step": 2096 }, { "epoch": 0.1616558741905643, "grad_norm": 4.029851913452148, "learning_rate": 9.55267534543077e-06, "loss": 1.138, "step": 2097 }, { "epoch": 0.16173296330558126, "grad_norm": 4.2497334480285645, "learning_rate": 9.552159055673122e-06, "loss": 1.0651, "step": 2098 }, { "epoch": 0.1618100524205982, "grad_norm": 3.8570566177368164, "learning_rate": 9.551642482111931e-06, "loss": 1.035, "step": 2099 }, { "epoch": 0.16188714153561518, "grad_norm": 4.435809135437012, "learning_rate": 9.551125624779407e-06, "loss": 1.1124, "step": 2100 }, { "epoch": 0.16196423065063212, "grad_norm": 4.071799278259277, "learning_rate": 9.55060848370777e-06, "loss": 1.1761, "step": 2101 }, { "epoch": 0.1620413197656491, "grad_norm": 3.8368101119995117, "learning_rate": 9.550091058929264e-06, "loss": 1.1273, "step": 2102 }, { "epoch": 0.16211840888066606, "grad_norm": 4.056320667266846, "learning_rate": 9.549573350476142e-06, "loss": 1.1069, "step": 2103 }, { "epoch": 0.162195497995683, "grad_norm": 4.4123148918151855, "learning_rate": 9.549055358380687e-06, "loss": 1.0627, "step": 2104 }, { "epoch": 0.16227258711069997, "grad_norm": 3.741130828857422, "learning_rate": 9.548537082675191e-06, "loss": 1.1405, "step": 2105 }, { "epoch": 0.16234967622571692, "grad_norm": 3.8030614852905273, "learning_rate": 9.548018523391965e-06, "loss": 1.1287, "step": 2106 }, { "epoch": 0.1624267653407339, "grad_norm": 3.879117727279663, "learning_rate": 9.54749968056334e-06, "loss": 1.035, "step": 2107 }, { "epoch": 0.16250385445575086, "grad_norm": 3.7147738933563232, "learning_rate": 9.546980554221659e-06, "loss": 0.9949, "step": 2108 }, { "epoch": 0.1625809435707678, "grad_norm": 4.359158515930176, "learning_rate": 9.546461144399293e-06, "loss": 1.069, "step": 2109 }, { "epoch": 0.16265803268578477, "grad_norm": 3.803569793701172, "learning_rate": 9.54594145112862e-06, "loss": 1.0991, "step": 2110 }, { "epoch": 0.1627351218008017, "grad_norm": 3.9304962158203125, "learning_rate": 9.545421474442042e-06, "loss": 1.0607, "step": 2111 }, { "epoch": 0.16281221091581868, "grad_norm": 4.038872718811035, "learning_rate": 9.544901214371976e-06, "loss": 1.0551, "step": 2112 }, { "epoch": 0.16288930003083565, "grad_norm": 4.135643482208252, "learning_rate": 9.544380670950857e-06, "loss": 1.0892, "step": 2113 }, { "epoch": 0.1629663891458526, "grad_norm": 4.121045112609863, "learning_rate": 9.543859844211141e-06, "loss": 1.0992, "step": 2114 }, { "epoch": 0.16304347826086957, "grad_norm": 3.6090307235717773, "learning_rate": 9.543338734185295e-06, "loss": 1.0014, "step": 2115 }, { "epoch": 0.16312056737588654, "grad_norm": 3.578199625015259, "learning_rate": 9.54281734090581e-06, "loss": 1.0165, "step": 2116 }, { "epoch": 0.16319765649090348, "grad_norm": 4.004699230194092, "learning_rate": 9.542295664405193e-06, "loss": 1.1468, "step": 2117 }, { "epoch": 0.16327474560592045, "grad_norm": 3.9040887355804443, "learning_rate": 9.541773704715966e-06, "loss": 1.0745, "step": 2118 }, { "epoch": 0.1633518347209374, "grad_norm": 3.8351752758026123, "learning_rate": 9.541251461870672e-06, "loss": 1.1056, "step": 2119 }, { "epoch": 0.16342892383595437, "grad_norm": 3.704251527786255, "learning_rate": 9.540728935901867e-06, "loss": 0.9895, "step": 2120 }, { "epoch": 0.16350601295097134, "grad_norm": 4.368956565856934, "learning_rate": 9.540206126842129e-06, "loss": 1.0922, "step": 2121 }, { "epoch": 0.16358310206598828, "grad_norm": 3.9333720207214355, "learning_rate": 9.539683034724054e-06, "loss": 1.1807, "step": 2122 }, { "epoch": 0.16366019118100525, "grad_norm": 3.7927298545837402, "learning_rate": 9.539159659580254e-06, "loss": 1.0409, "step": 2123 }, { "epoch": 0.1637372802960222, "grad_norm": 4.061433792114258, "learning_rate": 9.53863600144336e-06, "loss": 1.1204, "step": 2124 }, { "epoch": 0.16381436941103916, "grad_norm": 3.5982680320739746, "learning_rate": 9.538112060346016e-06, "loss": 1.157, "step": 2125 }, { "epoch": 0.16389145852605613, "grad_norm": 4.211023330688477, "learning_rate": 9.537587836320887e-06, "loss": 1.0758, "step": 2126 }, { "epoch": 0.16396854764107308, "grad_norm": 3.861417055130005, "learning_rate": 9.53706332940066e-06, "loss": 1.1376, "step": 2127 }, { "epoch": 0.16404563675609005, "grad_norm": 3.5378191471099854, "learning_rate": 9.536538539618031e-06, "loss": 1.0481, "step": 2128 }, { "epoch": 0.164122725871107, "grad_norm": 3.9547457695007324, "learning_rate": 9.53601346700572e-06, "loss": 1.0943, "step": 2129 }, { "epoch": 0.16419981498612396, "grad_norm": 3.9615726470947266, "learning_rate": 9.53548811159646e-06, "loss": 1.0865, "step": 2130 }, { "epoch": 0.16427690410114093, "grad_norm": 3.7515220642089844, "learning_rate": 9.534962473423008e-06, "loss": 1.1495, "step": 2131 }, { "epoch": 0.16435399321615787, "grad_norm": 4.2690534591674805, "learning_rate": 9.534436552518134e-06, "loss": 1.0762, "step": 2132 }, { "epoch": 0.16443108233117484, "grad_norm": 4.034238338470459, "learning_rate": 9.533910348914624e-06, "loss": 1.2103, "step": 2133 }, { "epoch": 0.1645081714461918, "grad_norm": 3.70621395111084, "learning_rate": 9.533383862645285e-06, "loss": 1.0361, "step": 2134 }, { "epoch": 0.16458526056120876, "grad_norm": 4.004809856414795, "learning_rate": 9.532857093742941e-06, "loss": 1.1349, "step": 2135 }, { "epoch": 0.16466234967622573, "grad_norm": 4.048299312591553, "learning_rate": 9.532330042240434e-06, "loss": 1.1853, "step": 2136 }, { "epoch": 0.16473943879124267, "grad_norm": 4.68448543548584, "learning_rate": 9.531802708170623e-06, "loss": 1.0591, "step": 2137 }, { "epoch": 0.16481652790625964, "grad_norm": 3.5135207176208496, "learning_rate": 9.531275091566384e-06, "loss": 0.9884, "step": 2138 }, { "epoch": 0.16489361702127658, "grad_norm": 4.260873317718506, "learning_rate": 9.530747192460609e-06, "loss": 1.1469, "step": 2139 }, { "epoch": 0.16497070613629355, "grad_norm": 3.82432222366333, "learning_rate": 9.530219010886214e-06, "loss": 1.1344, "step": 2140 }, { "epoch": 0.16504779525131053, "grad_norm": 4.045070171356201, "learning_rate": 9.529690546876125e-06, "loss": 1.0296, "step": 2141 }, { "epoch": 0.16512488436632747, "grad_norm": 4.116434574127197, "learning_rate": 9.529161800463291e-06, "loss": 1.1479, "step": 2142 }, { "epoch": 0.16520197348134444, "grad_norm": 3.7906370162963867, "learning_rate": 9.528632771680677e-06, "loss": 1.0819, "step": 2143 }, { "epoch": 0.16527906259636138, "grad_norm": 4.043308734893799, "learning_rate": 9.528103460561262e-06, "loss": 1.1089, "step": 2144 }, { "epoch": 0.16535615171137835, "grad_norm": 4.255034446716309, "learning_rate": 9.52757386713805e-06, "loss": 1.0368, "step": 2145 }, { "epoch": 0.16543324082639532, "grad_norm": 3.905506134033203, "learning_rate": 9.527043991444053e-06, "loss": 0.9421, "step": 2146 }, { "epoch": 0.16551032994141227, "grad_norm": 4.145698070526123, "learning_rate": 9.526513833512312e-06, "loss": 1.1444, "step": 2147 }, { "epoch": 0.16558741905642924, "grad_norm": 3.9307868480682373, "learning_rate": 9.525983393375877e-06, "loss": 1.1171, "step": 2148 }, { "epoch": 0.16566450817144618, "grad_norm": 3.6001784801483154, "learning_rate": 9.525452671067816e-06, "loss": 0.9414, "step": 2149 }, { "epoch": 0.16574159728646315, "grad_norm": 4.438070774078369, "learning_rate": 9.524921666621221e-06, "loss": 1.1429, "step": 2150 }, { "epoch": 0.16581868640148012, "grad_norm": 3.5969676971435547, "learning_rate": 9.524390380069195e-06, "loss": 0.97, "step": 2151 }, { "epoch": 0.16589577551649706, "grad_norm": 4.561358451843262, "learning_rate": 9.52385881144486e-06, "loss": 1.0779, "step": 2152 }, { "epoch": 0.16597286463151403, "grad_norm": 3.637179136276245, "learning_rate": 9.52332696078136e-06, "loss": 0.9986, "step": 2153 }, { "epoch": 0.16604995374653098, "grad_norm": 3.7705628871917725, "learning_rate": 9.522794828111849e-06, "loss": 1.0871, "step": 2154 }, { "epoch": 0.16612704286154795, "grad_norm": 3.748274803161621, "learning_rate": 9.522262413469505e-06, "loss": 1.0919, "step": 2155 }, { "epoch": 0.16620413197656492, "grad_norm": 3.7525219917297363, "learning_rate": 9.52172971688752e-06, "loss": 1.043, "step": 2156 }, { "epoch": 0.16628122109158186, "grad_norm": 4.160288333892822, "learning_rate": 9.521196738399107e-06, "loss": 1.0777, "step": 2157 }, { "epoch": 0.16635831020659883, "grad_norm": 4.212968826293945, "learning_rate": 9.520663478037493e-06, "loss": 0.9929, "step": 2158 }, { "epoch": 0.1664353993216158, "grad_norm": 4.7758588790893555, "learning_rate": 9.520129935835924e-06, "loss": 1.0723, "step": 2159 }, { "epoch": 0.16651248843663274, "grad_norm": 4.326691150665283, "learning_rate": 9.519596111827665e-06, "loss": 0.9742, "step": 2160 }, { "epoch": 0.16658957755164971, "grad_norm": 3.9504857063293457, "learning_rate": 9.519062006045995e-06, "loss": 1.0658, "step": 2161 }, { "epoch": 0.16666666666666666, "grad_norm": 3.9129183292388916, "learning_rate": 9.518527618524213e-06, "loss": 1.1364, "step": 2162 }, { "epoch": 0.16674375578168363, "grad_norm": 3.5338165760040283, "learning_rate": 9.517992949295637e-06, "loss": 1.0188, "step": 2163 }, { "epoch": 0.1668208448967006, "grad_norm": 3.9790713787078857, "learning_rate": 9.5174579983936e-06, "loss": 1.0366, "step": 2164 }, { "epoch": 0.16689793401171754, "grad_norm": 3.752887725830078, "learning_rate": 9.516922765851453e-06, "loss": 1.0697, "step": 2165 }, { "epoch": 0.1669750231267345, "grad_norm": 4.03433895111084, "learning_rate": 9.516387251702566e-06, "loss": 1.1041, "step": 2166 }, { "epoch": 0.16705211224175145, "grad_norm": 4.188197612762451, "learning_rate": 9.515851455980325e-06, "loss": 1.0372, "step": 2167 }, { "epoch": 0.16712920135676843, "grad_norm": 3.5528385639190674, "learning_rate": 9.515315378718132e-06, "loss": 1.021, "step": 2168 }, { "epoch": 0.1672062904717854, "grad_norm": 4.173407077789307, "learning_rate": 9.514779019949411e-06, "loss": 1.0054, "step": 2169 }, { "epoch": 0.16728337958680234, "grad_norm": 3.9701037406921387, "learning_rate": 9.514242379707604e-06, "loss": 1.0418, "step": 2170 }, { "epoch": 0.1673604687018193, "grad_norm": 4.371380805969238, "learning_rate": 9.513705458026162e-06, "loss": 1.1153, "step": 2171 }, { "epoch": 0.16743755781683625, "grad_norm": 4.344913482666016, "learning_rate": 9.51316825493856e-06, "loss": 1.0405, "step": 2172 }, { "epoch": 0.16751464693185322, "grad_norm": 4.681027412414551, "learning_rate": 9.512630770478292e-06, "loss": 1.0118, "step": 2173 }, { "epoch": 0.1675917360468702, "grad_norm": 4.137608528137207, "learning_rate": 9.51209300467887e-06, "loss": 1.1043, "step": 2174 }, { "epoch": 0.16766882516188714, "grad_norm": 3.731459379196167, "learning_rate": 9.511554957573816e-06, "loss": 0.901, "step": 2175 }, { "epoch": 0.1677459142769041, "grad_norm": 3.322524070739746, "learning_rate": 9.511016629196676e-06, "loss": 0.9328, "step": 2176 }, { "epoch": 0.16782300339192105, "grad_norm": 3.5919430255889893, "learning_rate": 9.510478019581013e-06, "loss": 0.9859, "step": 2177 }, { "epoch": 0.16790009250693802, "grad_norm": 3.575169086456299, "learning_rate": 9.509939128760406e-06, "loss": 1.0423, "step": 2178 }, { "epoch": 0.167977181621955, "grad_norm": 4.213058948516846, "learning_rate": 9.509399956768452e-06, "loss": 1.0613, "step": 2179 }, { "epoch": 0.16805427073697193, "grad_norm": 3.759490966796875, "learning_rate": 9.508860503638765e-06, "loss": 1.0447, "step": 2180 }, { "epoch": 0.1681313598519889, "grad_norm": 4.783963203430176, "learning_rate": 9.508320769404978e-06, "loss": 1.0838, "step": 2181 }, { "epoch": 0.16820844896700585, "grad_norm": 3.997779369354248, "learning_rate": 9.507780754100739e-06, "loss": 1.156, "step": 2182 }, { "epoch": 0.16828553808202282, "grad_norm": 3.6696715354919434, "learning_rate": 9.507240457759717e-06, "loss": 1.0282, "step": 2183 }, { "epoch": 0.1683626271970398, "grad_norm": 3.8719160556793213, "learning_rate": 9.506699880415597e-06, "loss": 1.0963, "step": 2184 }, { "epoch": 0.16843971631205673, "grad_norm": 3.7864603996276855, "learning_rate": 9.50615902210208e-06, "loss": 1.0538, "step": 2185 }, { "epoch": 0.1685168054270737, "grad_norm": 4.357522964477539, "learning_rate": 9.505617882852884e-06, "loss": 1.0002, "step": 2186 }, { "epoch": 0.16859389454209064, "grad_norm": 4.590842247009277, "learning_rate": 9.505076462701752e-06, "loss": 1.0613, "step": 2187 }, { "epoch": 0.16867098365710761, "grad_norm": 5.570279121398926, "learning_rate": 9.504534761682431e-06, "loss": 1.0677, "step": 2188 }, { "epoch": 0.16874807277212459, "grad_norm": 3.9890170097351074, "learning_rate": 9.503992779828698e-06, "loss": 0.9587, "step": 2189 }, { "epoch": 0.16882516188714153, "grad_norm": 3.743421792984009, "learning_rate": 9.503450517174344e-06, "loss": 1.1186, "step": 2190 }, { "epoch": 0.1689022510021585, "grad_norm": 4.146291255950928, "learning_rate": 9.502907973753173e-06, "loss": 1.1364, "step": 2191 }, { "epoch": 0.16897934011717544, "grad_norm": 4.190448760986328, "learning_rate": 9.50236514959901e-06, "loss": 1.1372, "step": 2192 }, { "epoch": 0.1690564292321924, "grad_norm": 4.3127641677856445, "learning_rate": 9.501822044745701e-06, "loss": 1.1888, "step": 2193 }, { "epoch": 0.16913351834720938, "grad_norm": 3.6845946311950684, "learning_rate": 9.501278659227101e-06, "loss": 0.9711, "step": 2194 }, { "epoch": 0.16921060746222633, "grad_norm": 3.9466843605041504, "learning_rate": 9.500734993077089e-06, "loss": 1.0328, "step": 2195 }, { "epoch": 0.1692876965772433, "grad_norm": 4.372476577758789, "learning_rate": 9.500191046329561e-06, "loss": 1.0785, "step": 2196 }, { "epoch": 0.16936478569226024, "grad_norm": 3.853376865386963, "learning_rate": 9.499646819018429e-06, "loss": 1.0748, "step": 2197 }, { "epoch": 0.1694418748072772, "grad_norm": 3.7514140605926514, "learning_rate": 9.499102311177622e-06, "loss": 1.1136, "step": 2198 }, { "epoch": 0.16951896392229418, "grad_norm": 3.941514730453491, "learning_rate": 9.498557522841085e-06, "loss": 1.2299, "step": 2199 }, { "epoch": 0.16959605303731112, "grad_norm": 4.029524326324463, "learning_rate": 9.498012454042786e-06, "loss": 1.0973, "step": 2200 }, { "epoch": 0.1696731421523281, "grad_norm": 4.247512340545654, "learning_rate": 9.497467104816709e-06, "loss": 1.1146, "step": 2201 }, { "epoch": 0.16975023126734506, "grad_norm": 3.7475624084472656, "learning_rate": 9.496921475196847e-06, "loss": 1.0355, "step": 2202 }, { "epoch": 0.169827320382362, "grad_norm": 3.81801438331604, "learning_rate": 9.496375565217225e-06, "loss": 1.1312, "step": 2203 }, { "epoch": 0.16990440949737898, "grad_norm": 4.464087963104248, "learning_rate": 9.49582937491187e-06, "loss": 1.1643, "step": 2204 }, { "epoch": 0.16998149861239592, "grad_norm": 3.895153522491455, "learning_rate": 9.49528290431484e-06, "loss": 1.1044, "step": 2205 }, { "epoch": 0.1700585877274129, "grad_norm": 4.139657974243164, "learning_rate": 9.494736153460204e-06, "loss": 1.1443, "step": 2206 }, { "epoch": 0.17013567684242986, "grad_norm": 3.7436201572418213, "learning_rate": 9.494189122382046e-06, "loss": 1.0922, "step": 2207 }, { "epoch": 0.1702127659574468, "grad_norm": 4.397914409637451, "learning_rate": 9.493641811114472e-06, "loss": 1.1507, "step": 2208 }, { "epoch": 0.17028985507246377, "grad_norm": 3.714843511581421, "learning_rate": 9.493094219691606e-06, "loss": 1.0862, "step": 2209 }, { "epoch": 0.17036694418748072, "grad_norm": 3.666947603225708, "learning_rate": 9.492546348147584e-06, "loss": 1.0753, "step": 2210 }, { "epoch": 0.1704440333024977, "grad_norm": 4.164958953857422, "learning_rate": 9.491998196516564e-06, "loss": 1.133, "step": 2211 }, { "epoch": 0.17052112241751466, "grad_norm": 4.119793891906738, "learning_rate": 9.491449764832722e-06, "loss": 1.0534, "step": 2212 }, { "epoch": 0.1705982115325316, "grad_norm": 3.694411277770996, "learning_rate": 9.49090105313025e-06, "loss": 1.0354, "step": 2213 }, { "epoch": 0.17067530064754857, "grad_norm": 4.022437572479248, "learning_rate": 9.490352061443355e-06, "loss": 1.0719, "step": 2214 }, { "epoch": 0.17075238976256552, "grad_norm": 4.365479946136475, "learning_rate": 9.489802789806265e-06, "loss": 1.2675, "step": 2215 }, { "epoch": 0.17082947887758249, "grad_norm": 4.730959892272949, "learning_rate": 9.489253238253225e-06, "loss": 1.1868, "step": 2216 }, { "epoch": 0.17090656799259946, "grad_norm": 3.6116209030151367, "learning_rate": 9.488703406818496e-06, "loss": 1.0207, "step": 2217 }, { "epoch": 0.1709836571076164, "grad_norm": 4.13337516784668, "learning_rate": 9.488153295536358e-06, "loss": 1.2437, "step": 2218 }, { "epoch": 0.17106074622263337, "grad_norm": 4.369235038757324, "learning_rate": 9.487602904441106e-06, "loss": 1.1526, "step": 2219 }, { "epoch": 0.1711378353376503, "grad_norm": 3.9253830909729004, "learning_rate": 9.487052233567055e-06, "loss": 1.0983, "step": 2220 }, { "epoch": 0.17121492445266728, "grad_norm": 3.9150092601776123, "learning_rate": 9.486501282948535e-06, "loss": 1.0501, "step": 2221 }, { "epoch": 0.17129201356768425, "grad_norm": 4.1889424324035645, "learning_rate": 9.485950052619898e-06, "loss": 1.0532, "step": 2222 }, { "epoch": 0.1713691026827012, "grad_norm": 4.013206481933594, "learning_rate": 9.485398542615507e-06, "loss": 1.0343, "step": 2223 }, { "epoch": 0.17144619179771817, "grad_norm": 3.747413158416748, "learning_rate": 9.484846752969747e-06, "loss": 0.9813, "step": 2224 }, { "epoch": 0.1715232809127351, "grad_norm": 4.145966053009033, "learning_rate": 9.484294683717021e-06, "loss": 1.047, "step": 2225 }, { "epoch": 0.17160037002775208, "grad_norm": 3.959221124649048, "learning_rate": 9.483742334891747e-06, "loss": 0.988, "step": 2226 }, { "epoch": 0.17167745914276905, "grad_norm": 4.141855239868164, "learning_rate": 9.483189706528358e-06, "loss": 1.0136, "step": 2227 }, { "epoch": 0.171754548257786, "grad_norm": 4.037559509277344, "learning_rate": 9.482636798661311e-06, "loss": 1.003, "step": 2228 }, { "epoch": 0.17183163737280296, "grad_norm": 3.8605642318725586, "learning_rate": 9.482083611325076e-06, "loss": 1.105, "step": 2229 }, { "epoch": 0.1719087264878199, "grad_norm": 4.074838161468506, "learning_rate": 9.48153014455414e-06, "loss": 1.073, "step": 2230 }, { "epoch": 0.17198581560283688, "grad_norm": 3.978645086288452, "learning_rate": 9.480976398383013e-06, "loss": 1.1489, "step": 2231 }, { "epoch": 0.17206290471785385, "grad_norm": 4.13065242767334, "learning_rate": 9.480422372846212e-06, "loss": 1.1217, "step": 2232 }, { "epoch": 0.1721399938328708, "grad_norm": 4.274265766143799, "learning_rate": 9.479868067978282e-06, "loss": 1.0354, "step": 2233 }, { "epoch": 0.17221708294788776, "grad_norm": 3.908271312713623, "learning_rate": 9.47931348381378e-06, "loss": 1.0812, "step": 2234 }, { "epoch": 0.1722941720629047, "grad_norm": 4.128848075866699, "learning_rate": 9.478758620387281e-06, "loss": 1.0114, "step": 2235 }, { "epoch": 0.17237126117792168, "grad_norm": 4.402393341064453, "learning_rate": 9.478203477733377e-06, "loss": 1.1512, "step": 2236 }, { "epoch": 0.17244835029293865, "grad_norm": 4.182422161102295, "learning_rate": 9.477648055886682e-06, "loss": 1.0967, "step": 2237 }, { "epoch": 0.1725254394079556, "grad_norm": 3.684023857116699, "learning_rate": 9.477092354881818e-06, "loss": 1.0843, "step": 2238 }, { "epoch": 0.17260252852297256, "grad_norm": 4.2031660079956055, "learning_rate": 9.476536374753434e-06, "loss": 1.1126, "step": 2239 }, { "epoch": 0.1726796176379895, "grad_norm": 3.8642640113830566, "learning_rate": 9.475980115536193e-06, "loss": 1.0219, "step": 2240 }, { "epoch": 0.17275670675300647, "grad_norm": 4.426207542419434, "learning_rate": 9.475423577264772e-06, "loss": 1.1832, "step": 2241 }, { "epoch": 0.17283379586802344, "grad_norm": 3.9104762077331543, "learning_rate": 9.474866759973871e-06, "loss": 1.0344, "step": 2242 }, { "epoch": 0.17291088498304039, "grad_norm": 4.044737815856934, "learning_rate": 9.474309663698202e-06, "loss": 1.0671, "step": 2243 }, { "epoch": 0.17298797409805736, "grad_norm": 4.002004623413086, "learning_rate": 9.473752288472499e-06, "loss": 1.0373, "step": 2244 }, { "epoch": 0.17306506321307433, "grad_norm": 3.7214102745056152, "learning_rate": 9.473194634331512e-06, "loss": 1.0278, "step": 2245 }, { "epoch": 0.17314215232809127, "grad_norm": 3.870584487915039, "learning_rate": 9.472636701310005e-06, "loss": 0.9826, "step": 2246 }, { "epoch": 0.17321924144310824, "grad_norm": 3.6483523845672607, "learning_rate": 9.472078489442766e-06, "loss": 1.086, "step": 2247 }, { "epoch": 0.17329633055812518, "grad_norm": 3.894209384918213, "learning_rate": 9.471519998764593e-06, "loss": 1.0286, "step": 2248 }, { "epoch": 0.17337341967314215, "grad_norm": 3.856490135192871, "learning_rate": 9.470961229310307e-06, "loss": 1.1207, "step": 2249 }, { "epoch": 0.17345050878815912, "grad_norm": 5.136345386505127, "learning_rate": 9.470402181114747e-06, "loss": 1.0207, "step": 2250 }, { "epoch": 0.17352759790317607, "grad_norm": 4.148111343383789, "learning_rate": 9.46984285421276e-06, "loss": 1.1127, "step": 2251 }, { "epoch": 0.17360468701819304, "grad_norm": 3.8354268074035645, "learning_rate": 9.469283248639223e-06, "loss": 1.0683, "step": 2252 }, { "epoch": 0.17368177613320998, "grad_norm": 3.8218307495117188, "learning_rate": 9.468723364429021e-06, "loss": 1.0874, "step": 2253 }, { "epoch": 0.17375886524822695, "grad_norm": 4.011178016662598, "learning_rate": 9.468163201617063e-06, "loss": 1.0788, "step": 2254 }, { "epoch": 0.17383595436324392, "grad_norm": 3.5386111736297607, "learning_rate": 9.467602760238268e-06, "loss": 0.8796, "step": 2255 }, { "epoch": 0.17391304347826086, "grad_norm": 3.82024884223938, "learning_rate": 9.467042040327582e-06, "loss": 1.0407, "step": 2256 }, { "epoch": 0.17399013259327784, "grad_norm": 4.042757987976074, "learning_rate": 9.46648104191996e-06, "loss": 1.1627, "step": 2257 }, { "epoch": 0.17406722170829478, "grad_norm": 3.925922393798828, "learning_rate": 9.465919765050375e-06, "loss": 1.1055, "step": 2258 }, { "epoch": 0.17414431082331175, "grad_norm": 3.940410852432251, "learning_rate": 9.465358209753824e-06, "loss": 0.9985, "step": 2259 }, { "epoch": 0.17422139993832872, "grad_norm": 3.904409646987915, "learning_rate": 9.464796376065316e-06, "loss": 1.0892, "step": 2260 }, { "epoch": 0.17429848905334566, "grad_norm": 3.91276216506958, "learning_rate": 9.464234264019876e-06, "loss": 1.0863, "step": 2261 }, { "epoch": 0.17437557816836263, "grad_norm": 3.8074424266815186, "learning_rate": 9.463671873652551e-06, "loss": 1.0709, "step": 2262 }, { "epoch": 0.17445266728337958, "grad_norm": 4.295366287231445, "learning_rate": 9.463109204998405e-06, "loss": 1.0679, "step": 2263 }, { "epoch": 0.17452975639839655, "grad_norm": 3.8966548442840576, "learning_rate": 9.462546258092512e-06, "loss": 1.0652, "step": 2264 }, { "epoch": 0.17460684551341352, "grad_norm": 3.9106335639953613, "learning_rate": 9.461983032969972e-06, "loss": 1.0031, "step": 2265 }, { "epoch": 0.17468393462843046, "grad_norm": 4.223884582519531, "learning_rate": 9.4614195296659e-06, "loss": 1.1193, "step": 2266 }, { "epoch": 0.17476102374344743, "grad_norm": 3.972520351409912, "learning_rate": 9.460855748215427e-06, "loss": 1.1208, "step": 2267 }, { "epoch": 0.17483811285846437, "grad_norm": 3.772442102432251, "learning_rate": 9.460291688653702e-06, "loss": 1.0707, "step": 2268 }, { "epoch": 0.17491520197348134, "grad_norm": 4.243513584136963, "learning_rate": 9.45972735101589e-06, "loss": 1.0806, "step": 2269 }, { "epoch": 0.17499229108849831, "grad_norm": 3.9305193424224854, "learning_rate": 9.459162735337176e-06, "loss": 1.0473, "step": 2270 }, { "epoch": 0.17506938020351526, "grad_norm": 3.5326061248779297, "learning_rate": 9.458597841652758e-06, "loss": 1.0894, "step": 2271 }, { "epoch": 0.17514646931853223, "grad_norm": 4.2558746337890625, "learning_rate": 9.458032669997858e-06, "loss": 1.1245, "step": 2272 }, { "epoch": 0.17522355843354917, "grad_norm": 3.9435577392578125, "learning_rate": 9.457467220407707e-06, "loss": 1.0961, "step": 2273 }, { "epoch": 0.17530064754856614, "grad_norm": 4.034180641174316, "learning_rate": 9.456901492917564e-06, "loss": 1.0136, "step": 2274 }, { "epoch": 0.1753777366635831, "grad_norm": 4.0210676193237305, "learning_rate": 9.456335487562695e-06, "loss": 1.058, "step": 2275 }, { "epoch": 0.17545482577860005, "grad_norm": 3.803864002227783, "learning_rate": 9.455769204378388e-06, "loss": 0.9875, "step": 2276 }, { "epoch": 0.17553191489361702, "grad_norm": 3.870218515396118, "learning_rate": 9.455202643399948e-06, "loss": 1.0243, "step": 2277 }, { "epoch": 0.17560900400863397, "grad_norm": 3.751405954360962, "learning_rate": 9.454635804662697e-06, "loss": 1.0363, "step": 2278 }, { "epoch": 0.17568609312365094, "grad_norm": 3.922459125518799, "learning_rate": 9.454068688201975e-06, "loss": 1.0977, "step": 2279 }, { "epoch": 0.1757631822386679, "grad_norm": 4.521456241607666, "learning_rate": 9.453501294053139e-06, "loss": 1.1405, "step": 2280 }, { "epoch": 0.17584027135368485, "grad_norm": 4.3583478927612305, "learning_rate": 9.452933622251561e-06, "loss": 1.1479, "step": 2281 }, { "epoch": 0.17591736046870182, "grad_norm": 3.9341979026794434, "learning_rate": 9.452365672832635e-06, "loss": 1.0467, "step": 2282 }, { "epoch": 0.1759944495837188, "grad_norm": 4.151937484741211, "learning_rate": 9.45179744583177e-06, "loss": 1.0061, "step": 2283 }, { "epoch": 0.17607153869873574, "grad_norm": 3.7211530208587646, "learning_rate": 9.451228941284389e-06, "loss": 1.026, "step": 2284 }, { "epoch": 0.1761486278137527, "grad_norm": 4.126251697540283, "learning_rate": 9.450660159225938e-06, "loss": 1.055, "step": 2285 }, { "epoch": 0.17622571692876965, "grad_norm": 3.7324936389923096, "learning_rate": 9.450091099691876e-06, "loss": 1.0071, "step": 2286 }, { "epoch": 0.17630280604378662, "grad_norm": 3.911670446395874, "learning_rate": 9.449521762717682e-06, "loss": 1.0975, "step": 2287 }, { "epoch": 0.1763798951588036, "grad_norm": 3.6570870876312256, "learning_rate": 9.44895214833885e-06, "loss": 0.911, "step": 2288 }, { "epoch": 0.17645698427382053, "grad_norm": 3.5680623054504395, "learning_rate": 9.448382256590893e-06, "loss": 0.9641, "step": 2289 }, { "epoch": 0.1765340733888375, "grad_norm": 3.8727200031280518, "learning_rate": 9.447812087509343e-06, "loss": 1.1022, "step": 2290 }, { "epoch": 0.17661116250385445, "grad_norm": 3.9284706115722656, "learning_rate": 9.447241641129743e-06, "loss": 1.1042, "step": 2291 }, { "epoch": 0.17668825161887142, "grad_norm": 4.463204383850098, "learning_rate": 9.446670917487662e-06, "loss": 1.1087, "step": 2292 }, { "epoch": 0.1767653407338884, "grad_norm": 3.742551803588867, "learning_rate": 9.446099916618676e-06, "loss": 1.143, "step": 2293 }, { "epoch": 0.17684242984890533, "grad_norm": 3.764596462249756, "learning_rate": 9.445528638558389e-06, "loss": 1.0978, "step": 2294 }, { "epoch": 0.1769195189639223, "grad_norm": 3.68927001953125, "learning_rate": 9.444957083342415e-06, "loss": 0.8989, "step": 2295 }, { "epoch": 0.17699660807893924, "grad_norm": 3.6712417602539062, "learning_rate": 9.444385251006389e-06, "loss": 0.9342, "step": 2296 }, { "epoch": 0.17707369719395621, "grad_norm": 4.3771467208862305, "learning_rate": 9.44381314158596e-06, "loss": 1.1587, "step": 2297 }, { "epoch": 0.17715078630897318, "grad_norm": 3.71481990814209, "learning_rate": 9.443240755116797e-06, "loss": 1.0263, "step": 2298 }, { "epoch": 0.17722787542399013, "grad_norm": 3.5364553928375244, "learning_rate": 9.442668091634583e-06, "loss": 0.9791, "step": 2299 }, { "epoch": 0.1773049645390071, "grad_norm": 4.014560699462891, "learning_rate": 9.442095151175024e-06, "loss": 0.9919, "step": 2300 }, { "epoch": 0.17738205365402404, "grad_norm": 4.145420074462891, "learning_rate": 9.441521933773839e-06, "loss": 1.047, "step": 2301 }, { "epoch": 0.177459142769041, "grad_norm": 4.094470977783203, "learning_rate": 9.440948439466764e-06, "loss": 1.095, "step": 2302 }, { "epoch": 0.17753623188405798, "grad_norm": 3.7132604122161865, "learning_rate": 9.440374668289553e-06, "loss": 1.0473, "step": 2303 }, { "epoch": 0.17761332099907493, "grad_norm": 3.507946491241455, "learning_rate": 9.439800620277981e-06, "loss": 1.0164, "step": 2304 }, { "epoch": 0.1776904101140919, "grad_norm": 3.750944137573242, "learning_rate": 9.439226295467834e-06, "loss": 1.048, "step": 2305 }, { "epoch": 0.17776749922910884, "grad_norm": 3.6753499507904053, "learning_rate": 9.438651693894919e-06, "loss": 1.0394, "step": 2306 }, { "epoch": 0.1778445883441258, "grad_norm": 4.113043785095215, "learning_rate": 9.438076815595058e-06, "loss": 0.9472, "step": 2307 }, { "epoch": 0.17792167745914278, "grad_norm": 4.260312080383301, "learning_rate": 9.437501660604094e-06, "loss": 1.1437, "step": 2308 }, { "epoch": 0.17799876657415972, "grad_norm": 3.7821967601776123, "learning_rate": 9.436926228957883e-06, "loss": 1.0623, "step": 2309 }, { "epoch": 0.1780758556891767, "grad_norm": 3.920973062515259, "learning_rate": 9.436350520692303e-06, "loss": 1.0917, "step": 2310 }, { "epoch": 0.17815294480419364, "grad_norm": 3.868009567260742, "learning_rate": 9.435774535843243e-06, "loss": 1.0319, "step": 2311 }, { "epoch": 0.1782300339192106, "grad_norm": 3.780529022216797, "learning_rate": 9.435198274446614e-06, "loss": 0.9861, "step": 2312 }, { "epoch": 0.17830712303422758, "grad_norm": 3.820247173309326, "learning_rate": 9.434621736538343e-06, "loss": 1.1113, "step": 2313 }, { "epoch": 0.17838421214924452, "grad_norm": 4.4522705078125, "learning_rate": 9.434044922154375e-06, "loss": 1.1872, "step": 2314 }, { "epoch": 0.1784613012642615, "grad_norm": 4.132708549499512, "learning_rate": 9.433467831330671e-06, "loss": 1.0276, "step": 2315 }, { "epoch": 0.17853839037927843, "grad_norm": 3.8560283184051514, "learning_rate": 9.432890464103208e-06, "loss": 1.077, "step": 2316 }, { "epoch": 0.1786154794942954, "grad_norm": 3.8112268447875977, "learning_rate": 9.432312820507982e-06, "loss": 1.059, "step": 2317 }, { "epoch": 0.17869256860931237, "grad_norm": 4.3246259689331055, "learning_rate": 9.431734900581011e-06, "loss": 1.2361, "step": 2318 }, { "epoch": 0.17876965772432932, "grad_norm": 4.359192371368408, "learning_rate": 9.431156704358318e-06, "loss": 1.0469, "step": 2319 }, { "epoch": 0.1788467468393463, "grad_norm": 3.730458974838257, "learning_rate": 9.430578231875956e-06, "loss": 0.9637, "step": 2320 }, { "epoch": 0.17892383595436323, "grad_norm": 4.066617965698242, "learning_rate": 9.429999483169987e-06, "loss": 0.956, "step": 2321 }, { "epoch": 0.1790009250693802, "grad_norm": 3.660848379135132, "learning_rate": 9.429420458276495e-06, "loss": 1.0173, "step": 2322 }, { "epoch": 0.17907801418439717, "grad_norm": 4.498156547546387, "learning_rate": 9.428841157231576e-06, "loss": 1.0781, "step": 2323 }, { "epoch": 0.17915510329941411, "grad_norm": 3.933738946914673, "learning_rate": 9.42826158007135e-06, "loss": 1.1102, "step": 2324 }, { "epoch": 0.17923219241443109, "grad_norm": 3.9478960037231445, "learning_rate": 9.427681726831948e-06, "loss": 1.0797, "step": 2325 }, { "epoch": 0.17930928152944806, "grad_norm": 4.165237903594971, "learning_rate": 9.427101597549522e-06, "loss": 1.0487, "step": 2326 }, { "epoch": 0.179386370644465, "grad_norm": 3.704395294189453, "learning_rate": 9.426521192260239e-06, "loss": 1.0585, "step": 2327 }, { "epoch": 0.17946345975948197, "grad_norm": 4.005001544952393, "learning_rate": 9.425940511000286e-06, "loss": 1.0563, "step": 2328 }, { "epoch": 0.1795405488744989, "grad_norm": 3.9118220806121826, "learning_rate": 9.425359553805866e-06, "loss": 1.1235, "step": 2329 }, { "epoch": 0.17961763798951588, "grad_norm": 4.479408264160156, "learning_rate": 9.424778320713196e-06, "loss": 1.1773, "step": 2330 }, { "epoch": 0.17969472710453285, "grad_norm": 3.661409854888916, "learning_rate": 9.424196811758515e-06, "loss": 0.9807, "step": 2331 }, { "epoch": 0.1797718162195498, "grad_norm": 3.719728946685791, "learning_rate": 9.423615026978076e-06, "loss": 1.1761, "step": 2332 }, { "epoch": 0.17984890533456677, "grad_norm": 4.085165023803711, "learning_rate": 9.42303296640815e-06, "loss": 1.0771, "step": 2333 }, { "epoch": 0.1799259944495837, "grad_norm": 3.7032485008239746, "learning_rate": 9.422450630085026e-06, "loss": 1.1194, "step": 2334 }, { "epoch": 0.18000308356460068, "grad_norm": 3.613755941390991, "learning_rate": 9.42186801804501e-06, "loss": 1.009, "step": 2335 }, { "epoch": 0.18008017267961765, "grad_norm": 3.7317869663238525, "learning_rate": 9.421285130324425e-06, "loss": 1.0114, "step": 2336 }, { "epoch": 0.1801572617946346, "grad_norm": 3.6703238487243652, "learning_rate": 9.42070196695961e-06, "loss": 0.9959, "step": 2337 }, { "epoch": 0.18023435090965156, "grad_norm": 3.9614789485931396, "learning_rate": 9.420118527986923e-06, "loss": 0.935, "step": 2338 }, { "epoch": 0.1803114400246685, "grad_norm": 4.142663478851318, "learning_rate": 9.41953481344274e-06, "loss": 1.1967, "step": 2339 }, { "epoch": 0.18038852913968548, "grad_norm": 3.7918946743011475, "learning_rate": 9.418950823363446e-06, "loss": 1.0058, "step": 2340 }, { "epoch": 0.18046561825470245, "grad_norm": 4.344407558441162, "learning_rate": 9.41836655778546e-06, "loss": 0.9986, "step": 2341 }, { "epoch": 0.1805427073697194, "grad_norm": 3.8436927795410156, "learning_rate": 9.417782016745198e-06, "loss": 1.1001, "step": 2342 }, { "epoch": 0.18061979648473636, "grad_norm": 4.281997203826904, "learning_rate": 9.41719720027911e-06, "loss": 1.1118, "step": 2343 }, { "epoch": 0.1806968855997533, "grad_norm": 4.253276824951172, "learning_rate": 9.416612108423653e-06, "loss": 1.0024, "step": 2344 }, { "epoch": 0.18077397471477027, "grad_norm": 3.944852590560913, "learning_rate": 9.416026741215305e-06, "loss": 1.0365, "step": 2345 }, { "epoch": 0.18085106382978725, "grad_norm": 3.773387908935547, "learning_rate": 9.415441098690562e-06, "loss": 0.9963, "step": 2346 }, { "epoch": 0.1809281529448042, "grad_norm": 3.8287198543548584, "learning_rate": 9.414855180885933e-06, "loss": 1.0586, "step": 2347 }, { "epoch": 0.18100524205982116, "grad_norm": 4.154395580291748, "learning_rate": 9.41426898783795e-06, "loss": 0.9962, "step": 2348 }, { "epoch": 0.1810823311748381, "grad_norm": 4.041691780090332, "learning_rate": 9.413682519583156e-06, "loss": 1.1823, "step": 2349 }, { "epoch": 0.18115942028985507, "grad_norm": 4.263957977294922, "learning_rate": 9.413095776158117e-06, "loss": 0.9897, "step": 2350 }, { "epoch": 0.18123650940487204, "grad_norm": 3.6769165992736816, "learning_rate": 9.412508757599413e-06, "loss": 1.0331, "step": 2351 }, { "epoch": 0.18131359851988899, "grad_norm": 3.703239679336548, "learning_rate": 9.411921463943641e-06, "loss": 0.9509, "step": 2352 }, { "epoch": 0.18139068763490596, "grad_norm": 3.7787482738494873, "learning_rate": 9.411333895227415e-06, "loss": 0.9942, "step": 2353 }, { "epoch": 0.1814677767499229, "grad_norm": 3.9517102241516113, "learning_rate": 9.410746051487367e-06, "loss": 1.0088, "step": 2354 }, { "epoch": 0.18154486586493987, "grad_norm": 3.5368359088897705, "learning_rate": 9.410157932760148e-06, "loss": 1.0106, "step": 2355 }, { "epoch": 0.18162195497995684, "grad_norm": 3.456052780151367, "learning_rate": 9.40956953908242e-06, "loss": 1.0061, "step": 2356 }, { "epoch": 0.18169904409497378, "grad_norm": 3.8283514976501465, "learning_rate": 9.408980870490872e-06, "loss": 1.0836, "step": 2357 }, { "epoch": 0.18177613320999075, "grad_norm": 3.708845853805542, "learning_rate": 9.4083919270222e-06, "loss": 0.9974, "step": 2358 }, { "epoch": 0.1818532223250077, "grad_norm": 3.6090118885040283, "learning_rate": 9.407802708713123e-06, "loss": 0.906, "step": 2359 }, { "epoch": 0.18193031144002467, "grad_norm": 4.260048866271973, "learning_rate": 9.407213215600377e-06, "loss": 1.1036, "step": 2360 }, { "epoch": 0.18200740055504164, "grad_norm": 4.0702056884765625, "learning_rate": 9.406623447720711e-06, "loss": 1.072, "step": 2361 }, { "epoch": 0.18208448967005858, "grad_norm": 4.009487152099609, "learning_rate": 9.406033405110896e-06, "loss": 1.0236, "step": 2362 }, { "epoch": 0.18216157878507555, "grad_norm": 4.346181869506836, "learning_rate": 9.405443087807717e-06, "loss": 0.978, "step": 2363 }, { "epoch": 0.1822386679000925, "grad_norm": 4.057311534881592, "learning_rate": 9.404852495847979e-06, "loss": 1.13, "step": 2364 }, { "epoch": 0.18231575701510946, "grad_norm": 4.186524391174316, "learning_rate": 9.4042616292685e-06, "loss": 1.0866, "step": 2365 }, { "epoch": 0.18239284613012643, "grad_norm": 3.631807565689087, "learning_rate": 9.40367048810612e-06, "loss": 0.9123, "step": 2366 }, { "epoch": 0.18246993524514338, "grad_norm": 3.841275930404663, "learning_rate": 9.403079072397692e-06, "loss": 1.19, "step": 2367 }, { "epoch": 0.18254702436016035, "grad_norm": 3.969886541366577, "learning_rate": 9.402487382180088e-06, "loss": 0.9525, "step": 2368 }, { "epoch": 0.18262411347517732, "grad_norm": 3.8512344360351562, "learning_rate": 9.401895417490197e-06, "loss": 0.9227, "step": 2369 }, { "epoch": 0.18270120259019426, "grad_norm": 3.7888450622558594, "learning_rate": 9.401303178364923e-06, "loss": 1.0298, "step": 2370 }, { "epoch": 0.18277829170521123, "grad_norm": 3.4874343872070312, "learning_rate": 9.400710664841192e-06, "loss": 0.997, "step": 2371 }, { "epoch": 0.18285538082022817, "grad_norm": 3.7443392276763916, "learning_rate": 9.400117876955943e-06, "loss": 1.0684, "step": 2372 }, { "epoch": 0.18293246993524515, "grad_norm": 4.328792572021484, "learning_rate": 9.399524814746133e-06, "loss": 1.1274, "step": 2373 }, { "epoch": 0.18300955905026212, "grad_norm": 3.9465620517730713, "learning_rate": 9.398931478248736e-06, "loss": 1.1876, "step": 2374 }, { "epoch": 0.18308664816527906, "grad_norm": 3.4830267429351807, "learning_rate": 9.398337867500744e-06, "loss": 0.9418, "step": 2375 }, { "epoch": 0.18316373728029603, "grad_norm": 3.6330952644348145, "learning_rate": 9.397743982539166e-06, "loss": 1.0558, "step": 2376 }, { "epoch": 0.18324082639531297, "grad_norm": 4.2686076164245605, "learning_rate": 9.397149823401029e-06, "loss": 1.0845, "step": 2377 }, { "epoch": 0.18331791551032994, "grad_norm": 3.876206636428833, "learning_rate": 9.396555390123371e-06, "loss": 1.0598, "step": 2378 }, { "epoch": 0.1833950046253469, "grad_norm": 4.2616472244262695, "learning_rate": 9.395960682743255e-06, "loss": 1.1524, "step": 2379 }, { "epoch": 0.18347209374036386, "grad_norm": 4.4118475914001465, "learning_rate": 9.39536570129776e-06, "loss": 1.2057, "step": 2380 }, { "epoch": 0.18354918285538083, "grad_norm": 3.960028648376465, "learning_rate": 9.394770445823976e-06, "loss": 1.0039, "step": 2381 }, { "epoch": 0.18362627197039777, "grad_norm": 3.7380380630493164, "learning_rate": 9.394174916359016e-06, "loss": 1.0101, "step": 2382 }, { "epoch": 0.18370336108541474, "grad_norm": 4.420310020446777, "learning_rate": 9.393579112940007e-06, "loss": 0.9947, "step": 2383 }, { "epoch": 0.1837804502004317, "grad_norm": 3.7579898834228516, "learning_rate": 9.392983035604098e-06, "loss": 0.9812, "step": 2384 }, { "epoch": 0.18385753931544865, "grad_norm": 4.156567573547363, "learning_rate": 9.392386684388446e-06, "loss": 1.1538, "step": 2385 }, { "epoch": 0.18393462843046562, "grad_norm": 3.7567379474639893, "learning_rate": 9.391790059330234e-06, "loss": 1.0734, "step": 2386 }, { "epoch": 0.18401171754548257, "grad_norm": 4.441596031188965, "learning_rate": 9.391193160466658e-06, "loss": 1.009, "step": 2387 }, { "epoch": 0.18408880666049954, "grad_norm": 4.164433479309082, "learning_rate": 9.39059598783493e-06, "loss": 1.1177, "step": 2388 }, { "epoch": 0.1841658957755165, "grad_norm": 4.159829139709473, "learning_rate": 9.389998541472282e-06, "loss": 1.0572, "step": 2389 }, { "epoch": 0.18424298489053345, "grad_norm": 4.0909743309021, "learning_rate": 9.389400821415962e-06, "loss": 1.0564, "step": 2390 }, { "epoch": 0.18432007400555042, "grad_norm": 4.253016471862793, "learning_rate": 9.388802827703231e-06, "loss": 1.162, "step": 2391 }, { "epoch": 0.18439716312056736, "grad_norm": 4.08793306350708, "learning_rate": 9.388204560371377e-06, "loss": 1.1314, "step": 2392 }, { "epoch": 0.18447425223558434, "grad_norm": 3.5922398567199707, "learning_rate": 9.387606019457696e-06, "loss": 1.0718, "step": 2393 }, { "epoch": 0.1845513413506013, "grad_norm": 3.838916063308716, "learning_rate": 9.387007204999503e-06, "loss": 1.1038, "step": 2394 }, { "epoch": 0.18462843046561825, "grad_norm": 3.8841969966888428, "learning_rate": 9.386408117034131e-06, "loss": 0.9366, "step": 2395 }, { "epoch": 0.18470551958063522, "grad_norm": 4.599263668060303, "learning_rate": 9.385808755598932e-06, "loss": 1.0969, "step": 2396 }, { "epoch": 0.18478260869565216, "grad_norm": 3.7928740978240967, "learning_rate": 9.38520912073127e-06, "loss": 1.0684, "step": 2397 }, { "epoch": 0.18485969781066913, "grad_norm": 4.084198474884033, "learning_rate": 9.384609212468531e-06, "loss": 1.1246, "step": 2398 }, { "epoch": 0.1849367869256861, "grad_norm": 4.296706199645996, "learning_rate": 9.384009030848118e-06, "loss": 1.0905, "step": 2399 }, { "epoch": 0.18501387604070305, "grad_norm": 4.311187267303467, "learning_rate": 9.383408575907446e-06, "loss": 1.1047, "step": 2400 }, { "epoch": 0.18509096515572002, "grad_norm": 3.62276554107666, "learning_rate": 9.382807847683952e-06, "loss": 1.1747, "step": 2401 }, { "epoch": 0.18516805427073696, "grad_norm": 3.8191843032836914, "learning_rate": 9.382206846215087e-06, "loss": 1.052, "step": 2402 }, { "epoch": 0.18524514338575393, "grad_norm": 4.004209041595459, "learning_rate": 9.381605571538321e-06, "loss": 0.9742, "step": 2403 }, { "epoch": 0.1853222325007709, "grad_norm": 3.64969539642334, "learning_rate": 9.381004023691142e-06, "loss": 1.0338, "step": 2404 }, { "epoch": 0.18539932161578784, "grad_norm": 4.0959978103637695, "learning_rate": 9.38040220271105e-06, "loss": 1.0634, "step": 2405 }, { "epoch": 0.1854764107308048, "grad_norm": 4.057779788970947, "learning_rate": 9.37980010863557e-06, "loss": 1.0364, "step": 2406 }, { "epoch": 0.18555349984582176, "grad_norm": 3.817060708999634, "learning_rate": 9.379197741502236e-06, "loss": 1.0794, "step": 2407 }, { "epoch": 0.18563058896083873, "grad_norm": 4.1733880043029785, "learning_rate": 9.378595101348602e-06, "loss": 1.0477, "step": 2408 }, { "epoch": 0.1857076780758557, "grad_norm": 3.8468310832977295, "learning_rate": 9.377992188212241e-06, "loss": 0.9913, "step": 2409 }, { "epoch": 0.18578476719087264, "grad_norm": 4.041358470916748, "learning_rate": 9.377389002130741e-06, "loss": 1.1645, "step": 2410 }, { "epoch": 0.1858618563058896, "grad_norm": 3.796046733856201, "learning_rate": 9.376785543141709e-06, "loss": 1.1405, "step": 2411 }, { "epoch": 0.18593894542090658, "grad_norm": 3.9358856678009033, "learning_rate": 9.376181811282764e-06, "loss": 1.073, "step": 2412 }, { "epoch": 0.18601603453592352, "grad_norm": 4.8310956954956055, "learning_rate": 9.375577806591552e-06, "loss": 1.1654, "step": 2413 }, { "epoch": 0.1860931236509405, "grad_norm": 4.1156792640686035, "learning_rate": 9.374973529105722e-06, "loss": 0.9819, "step": 2414 }, { "epoch": 0.18617021276595744, "grad_norm": 4.034856796264648, "learning_rate": 9.374368978862952e-06, "loss": 1.0477, "step": 2415 }, { "epoch": 0.1862473018809744, "grad_norm": 4.216608047485352, "learning_rate": 9.373764155900932e-06, "loss": 1.1058, "step": 2416 }, { "epoch": 0.18632439099599138, "grad_norm": 3.979484796524048, "learning_rate": 9.373159060257368e-06, "loss": 0.9858, "step": 2417 }, { "epoch": 0.18640148011100832, "grad_norm": 3.889035940170288, "learning_rate": 9.372553691969987e-06, "loss": 0.9886, "step": 2418 }, { "epoch": 0.1864785692260253, "grad_norm": 3.900179862976074, "learning_rate": 9.37194805107653e-06, "loss": 0.919, "step": 2419 }, { "epoch": 0.18655565834104224, "grad_norm": 4.21008825302124, "learning_rate": 9.371342137614754e-06, "loss": 1.086, "step": 2420 }, { "epoch": 0.1866327474560592, "grad_norm": 3.7535738945007324, "learning_rate": 9.370735951622435e-06, "loss": 0.9342, "step": 2421 }, { "epoch": 0.18670983657107618, "grad_norm": 4.046445369720459, "learning_rate": 9.370129493137367e-06, "loss": 1.1538, "step": 2422 }, { "epoch": 0.18678692568609312, "grad_norm": 3.904557228088379, "learning_rate": 9.369522762197357e-06, "loss": 1.0304, "step": 2423 }, { "epoch": 0.1868640148011101, "grad_norm": 3.9920108318328857, "learning_rate": 9.368915758840235e-06, "loss": 1.0235, "step": 2424 }, { "epoch": 0.18694110391612703, "grad_norm": 4.578022480010986, "learning_rate": 9.36830848310384e-06, "loss": 1.0139, "step": 2425 }, { "epoch": 0.187018193031144, "grad_norm": 4.43154764175415, "learning_rate": 9.367700935026038e-06, "loss": 1.1063, "step": 2426 }, { "epoch": 0.18709528214616097, "grad_norm": 3.994178533554077, "learning_rate": 9.367093114644703e-06, "loss": 1.0885, "step": 2427 }, { "epoch": 0.18717237126117792, "grad_norm": 4.0110015869140625, "learning_rate": 9.366485021997728e-06, "loss": 1.05, "step": 2428 }, { "epoch": 0.1872494603761949, "grad_norm": 3.9679675102233887, "learning_rate": 9.36587665712303e-06, "loss": 1.0291, "step": 2429 }, { "epoch": 0.18732654949121183, "grad_norm": 4.2142558097839355, "learning_rate": 9.36526802005853e-06, "loss": 1.0634, "step": 2430 }, { "epoch": 0.1874036386062288, "grad_norm": 3.840346574783325, "learning_rate": 9.36465911084218e-06, "loss": 1.0937, "step": 2431 }, { "epoch": 0.18748072772124577, "grad_norm": 3.9532737731933594, "learning_rate": 9.36404992951194e-06, "loss": 1.1248, "step": 2432 }, { "epoch": 0.18755781683626271, "grad_norm": 3.815847873687744, "learning_rate": 9.363440476105787e-06, "loss": 0.9521, "step": 2433 }, { "epoch": 0.18763490595127968, "grad_norm": 4.12700080871582, "learning_rate": 9.36283075066172e-06, "loss": 1.0596, "step": 2434 }, { "epoch": 0.18771199506629663, "grad_norm": 4.737533092498779, "learning_rate": 9.36222075321775e-06, "loss": 1.1245, "step": 2435 }, { "epoch": 0.1877890841813136, "grad_norm": 3.9905295372009277, "learning_rate": 9.36161048381191e-06, "loss": 1.1027, "step": 2436 }, { "epoch": 0.18786617329633057, "grad_norm": 4.189011096954346, "learning_rate": 9.360999942482247e-06, "loss": 1.0894, "step": 2437 }, { "epoch": 0.1879432624113475, "grad_norm": 3.8845021724700928, "learning_rate": 9.360389129266822e-06, "loss": 1.0176, "step": 2438 }, { "epoch": 0.18802035152636448, "grad_norm": 4.049330234527588, "learning_rate": 9.359778044203718e-06, "loss": 0.8773, "step": 2439 }, { "epoch": 0.18809744064138142, "grad_norm": 3.8811943531036377, "learning_rate": 9.359166687331032e-06, "loss": 1.0447, "step": 2440 }, { "epoch": 0.1881745297563984, "grad_norm": 3.700079917907715, "learning_rate": 9.35855505868688e-06, "loss": 0.9359, "step": 2441 }, { "epoch": 0.18825161887141537, "grad_norm": 4.782196521759033, "learning_rate": 9.357943158309396e-06, "loss": 1.1038, "step": 2442 }, { "epoch": 0.1883287079864323, "grad_norm": 3.7085964679718018, "learning_rate": 9.357330986236723e-06, "loss": 1.0557, "step": 2443 }, { "epoch": 0.18840579710144928, "grad_norm": 4.303460597991943, "learning_rate": 9.356718542507032e-06, "loss": 1.0125, "step": 2444 }, { "epoch": 0.18848288621646622, "grad_norm": 3.9245944023132324, "learning_rate": 9.356105827158505e-06, "loss": 0.9875, "step": 2445 }, { "epoch": 0.1885599753314832, "grad_norm": 4.061418533325195, "learning_rate": 9.355492840229338e-06, "loss": 1.0002, "step": 2446 }, { "epoch": 0.18863706444650016, "grad_norm": 4.799796104431152, "learning_rate": 9.354879581757753e-06, "loss": 1.0555, "step": 2447 }, { "epoch": 0.1887141535615171, "grad_norm": 3.832697629928589, "learning_rate": 9.354266051781978e-06, "loss": 1.1255, "step": 2448 }, { "epoch": 0.18879124267653408, "grad_norm": 4.201719284057617, "learning_rate": 9.353652250340268e-06, "loss": 0.9662, "step": 2449 }, { "epoch": 0.18886833179155102, "grad_norm": 3.8492846488952637, "learning_rate": 9.353038177470886e-06, "loss": 1.0201, "step": 2450 }, { "epoch": 0.188945420906568, "grad_norm": 4.532588005065918, "learning_rate": 9.35242383321212e-06, "loss": 1.0201, "step": 2451 }, { "epoch": 0.18902251002158496, "grad_norm": 4.5830206871032715, "learning_rate": 9.35180921760227e-06, "loss": 1.0741, "step": 2452 }, { "epoch": 0.1890995991366019, "grad_norm": 4.1921868324279785, "learning_rate": 9.351194330679653e-06, "loss": 1.0388, "step": 2453 }, { "epoch": 0.18917668825161887, "grad_norm": 3.923304796218872, "learning_rate": 9.350579172482607e-06, "loss": 1.0602, "step": 2454 }, { "epoch": 0.18925377736663584, "grad_norm": 4.064269065856934, "learning_rate": 9.349963743049479e-06, "loss": 1.1611, "step": 2455 }, { "epoch": 0.1893308664816528, "grad_norm": 4.29310417175293, "learning_rate": 9.349348042418643e-06, "loss": 0.9577, "step": 2456 }, { "epoch": 0.18940795559666976, "grad_norm": 3.83111572265625, "learning_rate": 9.348732070628482e-06, "loss": 1.0647, "step": 2457 }, { "epoch": 0.1894850447116867, "grad_norm": 4.015953063964844, "learning_rate": 9.348115827717398e-06, "loss": 1.0998, "step": 2458 }, { "epoch": 0.18956213382670367, "grad_norm": 5.061834335327148, "learning_rate": 9.347499313723811e-06, "loss": 1.0665, "step": 2459 }, { "epoch": 0.18963922294172064, "grad_norm": 3.875699043273926, "learning_rate": 9.346882528686159e-06, "loss": 1.0707, "step": 2460 }, { "epoch": 0.18971631205673758, "grad_norm": 3.674454927444458, "learning_rate": 9.346265472642895e-06, "loss": 1.031, "step": 2461 }, { "epoch": 0.18979340117175456, "grad_norm": 3.886066198348999, "learning_rate": 9.34564814563249e-06, "loss": 1.0683, "step": 2462 }, { "epoch": 0.1898704902867715, "grad_norm": 3.8345463275909424, "learning_rate": 9.345030547693426e-06, "loss": 1.0606, "step": 2463 }, { "epoch": 0.18994757940178847, "grad_norm": 4.000880718231201, "learning_rate": 9.344412678864214e-06, "loss": 1.1364, "step": 2464 }, { "epoch": 0.19002466851680544, "grad_norm": 4.1079607009887695, "learning_rate": 9.34379453918337e-06, "loss": 1.1008, "step": 2465 }, { "epoch": 0.19010175763182238, "grad_norm": 3.764862537384033, "learning_rate": 9.343176128689434e-06, "loss": 0.9816, "step": 2466 }, { "epoch": 0.19017884674683935, "grad_norm": 4.217895984649658, "learning_rate": 9.342557447420961e-06, "loss": 0.9649, "step": 2467 }, { "epoch": 0.1902559358618563, "grad_norm": 4.198402404785156, "learning_rate": 9.34193849541652e-06, "loss": 1.0241, "step": 2468 }, { "epoch": 0.19033302497687327, "grad_norm": 3.5659427642822266, "learning_rate": 9.341319272714704e-06, "loss": 1.0963, "step": 2469 }, { "epoch": 0.19041011409189024, "grad_norm": 4.269014358520508, "learning_rate": 9.340699779354114e-06, "loss": 1.1575, "step": 2470 }, { "epoch": 0.19048720320690718, "grad_norm": 3.8241798877716064, "learning_rate": 9.340080015373374e-06, "loss": 1.0775, "step": 2471 }, { "epoch": 0.19056429232192415, "grad_norm": 4.083940505981445, "learning_rate": 9.339459980811122e-06, "loss": 1.0798, "step": 2472 }, { "epoch": 0.1906413814369411, "grad_norm": 3.9632468223571777, "learning_rate": 9.338839675706017e-06, "loss": 1.0339, "step": 2473 }, { "epoch": 0.19071847055195806, "grad_norm": 3.6100375652313232, "learning_rate": 9.338219100096728e-06, "loss": 1.0676, "step": 2474 }, { "epoch": 0.19079555966697503, "grad_norm": 3.7622032165527344, "learning_rate": 9.337598254021947e-06, "loss": 1.0779, "step": 2475 }, { "epoch": 0.19087264878199198, "grad_norm": 3.8564085960388184, "learning_rate": 9.33697713752038e-06, "loss": 1.0088, "step": 2476 }, { "epoch": 0.19094973789700895, "grad_norm": 4.029922962188721, "learning_rate": 9.33635575063075e-06, "loss": 1.1112, "step": 2477 }, { "epoch": 0.1910268270120259, "grad_norm": 4.0595502853393555, "learning_rate": 9.335734093391797e-06, "loss": 0.9914, "step": 2478 }, { "epoch": 0.19110391612704286, "grad_norm": 3.9760942459106445, "learning_rate": 9.335112165842277e-06, "loss": 1.0767, "step": 2479 }, { "epoch": 0.19118100524205983, "grad_norm": 4.477503776550293, "learning_rate": 9.334489968020968e-06, "loss": 1.0201, "step": 2480 }, { "epoch": 0.19125809435707677, "grad_norm": 4.229430198669434, "learning_rate": 9.333867499966659e-06, "loss": 1.0431, "step": 2481 }, { "epoch": 0.19133518347209374, "grad_norm": 3.821248769760132, "learning_rate": 9.333244761718157e-06, "loss": 1.0613, "step": 2482 }, { "epoch": 0.1914122725871107, "grad_norm": 3.8744101524353027, "learning_rate": 9.332621753314285e-06, "loss": 0.8911, "step": 2483 }, { "epoch": 0.19148936170212766, "grad_norm": 3.607072114944458, "learning_rate": 9.331998474793886e-06, "loss": 0.9713, "step": 2484 }, { "epoch": 0.19156645081714463, "grad_norm": 4.2475810050964355, "learning_rate": 9.331374926195819e-06, "loss": 1.076, "step": 2485 }, { "epoch": 0.19164353993216157, "grad_norm": 4.018882751464844, "learning_rate": 9.330751107558957e-06, "loss": 1.0054, "step": 2486 }, { "epoch": 0.19172062904717854, "grad_norm": 3.6863954067230225, "learning_rate": 9.330127018922195e-06, "loss": 0.9955, "step": 2487 }, { "epoch": 0.19179771816219549, "grad_norm": 3.837610960006714, "learning_rate": 9.329502660324437e-06, "loss": 1.0766, "step": 2488 }, { "epoch": 0.19187480727721246, "grad_norm": 3.6833620071411133, "learning_rate": 9.328878031804613e-06, "loss": 1.0074, "step": 2489 }, { "epoch": 0.19195189639222943, "grad_norm": 3.849368095397949, "learning_rate": 9.328253133401663e-06, "loss": 1.0552, "step": 2490 }, { "epoch": 0.19202898550724637, "grad_norm": 4.261739730834961, "learning_rate": 9.327627965154547e-06, "loss": 1.1149, "step": 2491 }, { "epoch": 0.19210607462226334, "grad_norm": 5.0863938331604, "learning_rate": 9.32700252710224e-06, "loss": 1.1051, "step": 2492 }, { "epoch": 0.19218316373728028, "grad_norm": 3.6857082843780518, "learning_rate": 9.326376819283737e-06, "loss": 1.0603, "step": 2493 }, { "epoch": 0.19226025285229725, "grad_norm": 4.171791076660156, "learning_rate": 9.325750841738046e-06, "loss": 0.9976, "step": 2494 }, { "epoch": 0.19233734196731422, "grad_norm": 3.735818386077881, "learning_rate": 9.325124594504191e-06, "loss": 1.0204, "step": 2495 }, { "epoch": 0.19241443108233117, "grad_norm": 3.711371660232544, "learning_rate": 9.32449807762122e-06, "loss": 1.0821, "step": 2496 }, { "epoch": 0.19249152019734814, "grad_norm": 3.6877596378326416, "learning_rate": 9.323871291128192e-06, "loss": 0.9228, "step": 2497 }, { "epoch": 0.1925686093123651, "grad_norm": 4.088908672332764, "learning_rate": 9.323244235064182e-06, "loss": 1.0792, "step": 2498 }, { "epoch": 0.19264569842738205, "grad_norm": 3.9644157886505127, "learning_rate": 9.322616909468284e-06, "loss": 1.0787, "step": 2499 }, { "epoch": 0.19272278754239902, "grad_norm": 3.950051784515381, "learning_rate": 9.321989314379609e-06, "loss": 1.1276, "step": 2500 }, { "epoch": 0.19279987665741596, "grad_norm": 4.185604095458984, "learning_rate": 9.321361449837286e-06, "loss": 1.0614, "step": 2501 }, { "epoch": 0.19287696577243293, "grad_norm": 4.12734842300415, "learning_rate": 9.320733315880455e-06, "loss": 1.0167, "step": 2502 }, { "epoch": 0.1929540548874499, "grad_norm": 3.2868363857269287, "learning_rate": 9.320104912548281e-06, "loss": 0.9841, "step": 2503 }, { "epoch": 0.19303114400246685, "grad_norm": 3.8465094566345215, "learning_rate": 9.319476239879941e-06, "loss": 0.949, "step": 2504 }, { "epoch": 0.19310823311748382, "grad_norm": 3.994518995285034, "learning_rate": 9.318847297914627e-06, "loss": 1.0177, "step": 2505 }, { "epoch": 0.19318532223250076, "grad_norm": 4.2856268882751465, "learning_rate": 9.318218086691553e-06, "loss": 1.1933, "step": 2506 }, { "epoch": 0.19326241134751773, "grad_norm": 3.8407466411590576, "learning_rate": 9.317588606249945e-06, "loss": 1.0947, "step": 2507 }, { "epoch": 0.1933395004625347, "grad_norm": 3.695981979370117, "learning_rate": 9.316958856629049e-06, "loss": 1.0531, "step": 2508 }, { "epoch": 0.19341658957755165, "grad_norm": 4.270875453948975, "learning_rate": 9.316328837868126e-06, "loss": 0.8991, "step": 2509 }, { "epoch": 0.19349367869256862, "grad_norm": 4.000987529754639, "learning_rate": 9.315698550006456e-06, "loss": 1.0195, "step": 2510 }, { "epoch": 0.19357076780758556, "grad_norm": 3.681183099746704, "learning_rate": 9.315067993083331e-06, "loss": 1.0773, "step": 2511 }, { "epoch": 0.19364785692260253, "grad_norm": 5.025501728057861, "learning_rate": 9.314437167138065e-06, "loss": 1.1425, "step": 2512 }, { "epoch": 0.1937249460376195, "grad_norm": 3.879424810409546, "learning_rate": 9.31380607220999e-06, "loss": 1.0285, "step": 2513 }, { "epoch": 0.19380203515263644, "grad_norm": 4.076712608337402, "learning_rate": 9.313174708338446e-06, "loss": 1.0066, "step": 2514 }, { "epoch": 0.1938791242676534, "grad_norm": 3.7475578784942627, "learning_rate": 9.312543075562797e-06, "loss": 1.019, "step": 2515 }, { "epoch": 0.19395621338267036, "grad_norm": 3.9618778228759766, "learning_rate": 9.311911173922423e-06, "loss": 1.0797, "step": 2516 }, { "epoch": 0.19403330249768733, "grad_norm": 4.08378267288208, "learning_rate": 9.311279003456719e-06, "loss": 1.0427, "step": 2517 }, { "epoch": 0.1941103916127043, "grad_norm": 3.681499719619751, "learning_rate": 9.3106465642051e-06, "loss": 1.0812, "step": 2518 }, { "epoch": 0.19418748072772124, "grad_norm": 3.9023890495300293, "learning_rate": 9.31001385620699e-06, "loss": 1.0172, "step": 2519 }, { "epoch": 0.1942645698427382, "grad_norm": 3.5285422801971436, "learning_rate": 9.309380879501839e-06, "loss": 1.0496, "step": 2520 }, { "epoch": 0.19434165895775515, "grad_norm": 3.923809051513672, "learning_rate": 9.30874763412911e-06, "loss": 1.0218, "step": 2521 }, { "epoch": 0.19441874807277212, "grad_norm": 3.9072353839874268, "learning_rate": 9.30811412012828e-06, "loss": 1.05, "step": 2522 }, { "epoch": 0.1944958371877891, "grad_norm": 3.4843831062316895, "learning_rate": 9.307480337538847e-06, "loss": 1.0038, "step": 2523 }, { "epoch": 0.19457292630280604, "grad_norm": 3.578922748565674, "learning_rate": 9.306846286400326e-06, "loss": 1.0023, "step": 2524 }, { "epoch": 0.194650015417823, "grad_norm": 3.625309705734253, "learning_rate": 9.306211966752243e-06, "loss": 1.0317, "step": 2525 }, { "epoch": 0.19472710453283995, "grad_norm": 3.810683012008667, "learning_rate": 9.305577378634148e-06, "loss": 1.0276, "step": 2526 }, { "epoch": 0.19480419364785692, "grad_norm": 4.362732887268066, "learning_rate": 9.3049425220856e-06, "loss": 1.0026, "step": 2527 }, { "epoch": 0.1948812827628739, "grad_norm": 3.8706610202789307, "learning_rate": 9.304307397146184e-06, "loss": 0.9928, "step": 2528 }, { "epoch": 0.19495837187789083, "grad_norm": 3.865088701248169, "learning_rate": 9.303672003855495e-06, "loss": 1.0645, "step": 2529 }, { "epoch": 0.1950354609929078, "grad_norm": 3.7114272117614746, "learning_rate": 9.303036342253143e-06, "loss": 1.003, "step": 2530 }, { "epoch": 0.19511255010792475, "grad_norm": 3.746708631515503, "learning_rate": 9.302400412378762e-06, "loss": 0.9651, "step": 2531 }, { "epoch": 0.19518963922294172, "grad_norm": 4.280477046966553, "learning_rate": 9.301764214272e-06, "loss": 0.9485, "step": 2532 }, { "epoch": 0.1952667283379587, "grad_norm": 3.7413394451141357, "learning_rate": 9.301127747972516e-06, "loss": 1.0762, "step": 2533 }, { "epoch": 0.19534381745297563, "grad_norm": 4.122095584869385, "learning_rate": 9.300491013519995e-06, "loss": 1.0784, "step": 2534 }, { "epoch": 0.1954209065679926, "grad_norm": 3.7376046180725098, "learning_rate": 9.29985401095413e-06, "loss": 1.0385, "step": 2535 }, { "epoch": 0.19549799568300955, "grad_norm": 4.303701877593994, "learning_rate": 9.299216740314639e-06, "loss": 1.0439, "step": 2536 }, { "epoch": 0.19557508479802652, "grad_norm": 4.109951019287109, "learning_rate": 9.298579201641248e-06, "loss": 1.0927, "step": 2537 }, { "epoch": 0.1956521739130435, "grad_norm": 4.093001365661621, "learning_rate": 9.29794139497371e-06, "loss": 1.0977, "step": 2538 }, { "epoch": 0.19572926302806043, "grad_norm": 3.4555115699768066, "learning_rate": 9.297303320351784e-06, "loss": 0.9951, "step": 2539 }, { "epoch": 0.1958063521430774, "grad_norm": 3.6072354316711426, "learning_rate": 9.296664977815251e-06, "loss": 0.9743, "step": 2540 }, { "epoch": 0.19588344125809437, "grad_norm": 3.506932020187378, "learning_rate": 9.296026367403912e-06, "loss": 0.9706, "step": 2541 }, { "epoch": 0.1959605303731113, "grad_norm": 4.099440097808838, "learning_rate": 9.295387489157577e-06, "loss": 1.0866, "step": 2542 }, { "epoch": 0.19603761948812828, "grad_norm": 3.744800090789795, "learning_rate": 9.29474834311608e-06, "loss": 0.9554, "step": 2543 }, { "epoch": 0.19611470860314523, "grad_norm": 3.71545672416687, "learning_rate": 9.294108929319266e-06, "loss": 1.0344, "step": 2544 }, { "epoch": 0.1961917977181622, "grad_norm": 4.267796516418457, "learning_rate": 9.293469247806999e-06, "loss": 1.0125, "step": 2545 }, { "epoch": 0.19626888683317917, "grad_norm": 3.7793099880218506, "learning_rate": 9.292829298619161e-06, "loss": 1.0101, "step": 2546 }, { "epoch": 0.1963459759481961, "grad_norm": 3.366549015045166, "learning_rate": 9.292189081795651e-06, "loss": 0.9683, "step": 2547 }, { "epoch": 0.19642306506321308, "grad_norm": 3.89920973777771, "learning_rate": 9.291548597376382e-06, "loss": 1.0093, "step": 2548 }, { "epoch": 0.19650015417823002, "grad_norm": 3.8635799884796143, "learning_rate": 9.290907845401283e-06, "loss": 1.0173, "step": 2549 }, { "epoch": 0.196577243293247, "grad_norm": 3.8625593185424805, "learning_rate": 9.2902668259103e-06, "loss": 1.0594, "step": 2550 }, { "epoch": 0.19665433240826397, "grad_norm": 3.8342230319976807, "learning_rate": 9.289625538943405e-06, "loss": 0.9789, "step": 2551 }, { "epoch": 0.1967314215232809, "grad_norm": 3.684582233428955, "learning_rate": 9.288983984540573e-06, "loss": 0.8711, "step": 2552 }, { "epoch": 0.19680851063829788, "grad_norm": 4.02480936050415, "learning_rate": 9.288342162741803e-06, "loss": 0.9812, "step": 2553 }, { "epoch": 0.19688559975331482, "grad_norm": 3.966688871383667, "learning_rate": 9.287700073587106e-06, "loss": 0.9101, "step": 2554 }, { "epoch": 0.1969626888683318, "grad_norm": 3.733137607574463, "learning_rate": 9.28705771711652e-06, "loss": 1.0469, "step": 2555 }, { "epoch": 0.19703977798334876, "grad_norm": 4.436910629272461, "learning_rate": 9.286415093370087e-06, "loss": 1.0353, "step": 2556 }, { "epoch": 0.1971168670983657, "grad_norm": 4.32921028137207, "learning_rate": 9.285772202387872e-06, "loss": 1.2546, "step": 2557 }, { "epoch": 0.19719395621338268, "grad_norm": 4.157625675201416, "learning_rate": 9.285129044209958e-06, "loss": 1.144, "step": 2558 }, { "epoch": 0.19727104532839962, "grad_norm": 4.597263813018799, "learning_rate": 9.284485618876442e-06, "loss": 1.1378, "step": 2559 }, { "epoch": 0.1973481344434166, "grad_norm": 3.756883382797241, "learning_rate": 9.283841926427435e-06, "loss": 0.993, "step": 2560 }, { "epoch": 0.19742522355843356, "grad_norm": 3.802865982055664, "learning_rate": 9.283197966903073e-06, "loss": 1.0976, "step": 2561 }, { "epoch": 0.1975023126734505, "grad_norm": 3.97096848487854, "learning_rate": 9.282553740343501e-06, "loss": 1.0863, "step": 2562 }, { "epoch": 0.19757940178846747, "grad_norm": 3.7261781692504883, "learning_rate": 9.28190924678888e-06, "loss": 1.1109, "step": 2563 }, { "epoch": 0.19765649090348442, "grad_norm": 3.8818700313568115, "learning_rate": 9.281264486279398e-06, "loss": 1.1454, "step": 2564 }, { "epoch": 0.1977335800185014, "grad_norm": 3.9310739040374756, "learning_rate": 9.280619458855246e-06, "loss": 1.0731, "step": 2565 }, { "epoch": 0.19781066913351836, "grad_norm": 4.061331272125244, "learning_rate": 9.279974164556643e-06, "loss": 0.9986, "step": 2566 }, { "epoch": 0.1978877582485353, "grad_norm": 3.422016143798828, "learning_rate": 9.279328603423815e-06, "loss": 1.0443, "step": 2567 }, { "epoch": 0.19796484736355227, "grad_norm": 4.034252643585205, "learning_rate": 9.278682775497012e-06, "loss": 1.0879, "step": 2568 }, { "epoch": 0.1980419364785692, "grad_norm": 4.055642127990723, "learning_rate": 9.278036680816497e-06, "loss": 1.1014, "step": 2569 }, { "epoch": 0.19811902559358618, "grad_norm": 4.593109607696533, "learning_rate": 9.277390319422555e-06, "loss": 1.0524, "step": 2570 }, { "epoch": 0.19819611470860315, "grad_norm": 4.03125524520874, "learning_rate": 9.276743691355476e-06, "loss": 1.0198, "step": 2571 }, { "epoch": 0.1982732038236201, "grad_norm": 3.723781108856201, "learning_rate": 9.27609679665558e-06, "loss": 0.9915, "step": 2572 }, { "epoch": 0.19835029293863707, "grad_norm": 3.6599931716918945, "learning_rate": 9.275449635363195e-06, "loss": 1.0793, "step": 2573 }, { "epoch": 0.198427382053654, "grad_norm": 3.8307621479034424, "learning_rate": 9.274802207518668e-06, "loss": 1.0124, "step": 2574 }, { "epoch": 0.19850447116867098, "grad_norm": 3.984130620956421, "learning_rate": 9.274154513162364e-06, "loss": 0.9356, "step": 2575 }, { "epoch": 0.19858156028368795, "grad_norm": 3.825328826904297, "learning_rate": 9.27350655233466e-06, "loss": 1.0427, "step": 2576 }, { "epoch": 0.1986586493987049, "grad_norm": 4.283047199249268, "learning_rate": 9.272858325075957e-06, "loss": 1.0344, "step": 2577 }, { "epoch": 0.19873573851372187, "grad_norm": 3.8551671504974365, "learning_rate": 9.272209831426667e-06, "loss": 1.022, "step": 2578 }, { "epoch": 0.1988128276287388, "grad_norm": 3.6993603706359863, "learning_rate": 9.271561071427222e-06, "loss": 0.9627, "step": 2579 }, { "epoch": 0.19888991674375578, "grad_norm": 3.686549186706543, "learning_rate": 9.270912045118065e-06, "loss": 1.1547, "step": 2580 }, { "epoch": 0.19896700585877275, "grad_norm": 3.775413990020752, "learning_rate": 9.270262752539662e-06, "loss": 1.1395, "step": 2581 }, { "epoch": 0.1990440949737897, "grad_norm": 3.9551045894622803, "learning_rate": 9.269613193732492e-06, "loss": 1.05, "step": 2582 }, { "epoch": 0.19912118408880666, "grad_norm": 3.7017507553100586, "learning_rate": 9.268963368737053e-06, "loss": 1.0631, "step": 2583 }, { "epoch": 0.19919827320382363, "grad_norm": 4.082302093505859, "learning_rate": 9.268313277593859e-06, "loss": 1.177, "step": 2584 }, { "epoch": 0.19927536231884058, "grad_norm": 3.7110402584075928, "learning_rate": 9.267662920343436e-06, "loss": 1.0281, "step": 2585 }, { "epoch": 0.19935245143385755, "grad_norm": 4.061951160430908, "learning_rate": 9.267012297026334e-06, "loss": 1.0825, "step": 2586 }, { "epoch": 0.1994295405488745, "grad_norm": 3.88759708404541, "learning_rate": 9.266361407683115e-06, "loss": 1.0694, "step": 2587 }, { "epoch": 0.19950662966389146, "grad_norm": 4.016077518463135, "learning_rate": 9.265710252354358e-06, "loss": 1.0718, "step": 2588 }, { "epoch": 0.19958371877890843, "grad_norm": 3.7587389945983887, "learning_rate": 9.26505883108066e-06, "loss": 1.0342, "step": 2589 }, { "epoch": 0.19966080789392537, "grad_norm": 3.6690351963043213, "learning_rate": 9.264407143902632e-06, "loss": 1.057, "step": 2590 }, { "epoch": 0.19973789700894234, "grad_norm": 3.946889638900757, "learning_rate": 9.263755190860904e-06, "loss": 1.0364, "step": 2591 }, { "epoch": 0.1998149861239593, "grad_norm": 3.688429832458496, "learning_rate": 9.263102971996123e-06, "loss": 1.1029, "step": 2592 }, { "epoch": 0.19989207523897626, "grad_norm": 3.6999905109405518, "learning_rate": 9.262450487348952e-06, "loss": 1.0277, "step": 2593 }, { "epoch": 0.19996916435399323, "grad_norm": 3.4279160499572754, "learning_rate": 9.261797736960067e-06, "loss": 0.9504, "step": 2594 }, { "epoch": 0.20004625346901017, "grad_norm": 3.8050403594970703, "learning_rate": 9.261144720870167e-06, "loss": 0.9475, "step": 2595 }, { "epoch": 0.20012334258402714, "grad_norm": 4.1190009117126465, "learning_rate": 9.260491439119963e-06, "loss": 1.1579, "step": 2596 }, { "epoch": 0.20020043169904408, "grad_norm": 3.654827117919922, "learning_rate": 9.259837891750183e-06, "loss": 0.9909, "step": 2597 }, { "epoch": 0.20027752081406106, "grad_norm": 3.997933864593506, "learning_rate": 9.259184078801571e-06, "loss": 1.1038, "step": 2598 }, { "epoch": 0.20035460992907803, "grad_norm": 3.865818738937378, "learning_rate": 9.258530000314893e-06, "loss": 0.9885, "step": 2599 }, { "epoch": 0.20043169904409497, "grad_norm": 3.9201407432556152, "learning_rate": 9.257875656330923e-06, "loss": 1.1399, "step": 2600 }, { "epoch": 0.20050878815911194, "grad_norm": 3.548663377761841, "learning_rate": 9.257221046890459e-06, "loss": 1.0436, "step": 2601 }, { "epoch": 0.20058587727412888, "grad_norm": 4.565245628356934, "learning_rate": 9.256566172034312e-06, "loss": 1.0835, "step": 2602 }, { "epoch": 0.20066296638914585, "grad_norm": 3.7430996894836426, "learning_rate": 9.255911031803308e-06, "loss": 1.1998, "step": 2603 }, { "epoch": 0.20074005550416282, "grad_norm": 3.844200611114502, "learning_rate": 9.255255626238295e-06, "loss": 1.0305, "step": 2604 }, { "epoch": 0.20081714461917977, "grad_norm": 3.844482898712158, "learning_rate": 9.25459995538013e-06, "loss": 1.1108, "step": 2605 }, { "epoch": 0.20089423373419674, "grad_norm": 4.113783836364746, "learning_rate": 9.253944019269695e-06, "loss": 0.9527, "step": 2606 }, { "epoch": 0.20097132284921368, "grad_norm": 3.4817748069763184, "learning_rate": 9.25328781794788e-06, "loss": 0.9548, "step": 2607 }, { "epoch": 0.20104841196423065, "grad_norm": 3.778887987136841, "learning_rate": 9.2526313514556e-06, "loss": 1.0612, "step": 2608 }, { "epoch": 0.20112550107924762, "grad_norm": 4.771206855773926, "learning_rate": 9.251974619833779e-06, "loss": 1.1353, "step": 2609 }, { "epoch": 0.20120259019426456, "grad_norm": 3.8691132068634033, "learning_rate": 9.251317623123363e-06, "loss": 1.0599, "step": 2610 }, { "epoch": 0.20127967930928153, "grad_norm": 4.0417070388793945, "learning_rate": 9.25066036136531e-06, "loss": 1.0021, "step": 2611 }, { "epoch": 0.20135676842429848, "grad_norm": 4.274367809295654, "learning_rate": 9.2500028346006e-06, "loss": 1.0906, "step": 2612 }, { "epoch": 0.20143385753931545, "grad_norm": 3.9163665771484375, "learning_rate": 9.249345042870222e-06, "loss": 0.9961, "step": 2613 }, { "epoch": 0.20151094665433242, "grad_norm": 4.183355808258057, "learning_rate": 9.248686986215189e-06, "loss": 1.1059, "step": 2614 }, { "epoch": 0.20158803576934936, "grad_norm": 3.863225221633911, "learning_rate": 9.248028664676529e-06, "loss": 1.0913, "step": 2615 }, { "epoch": 0.20166512488436633, "grad_norm": 4.189373016357422, "learning_rate": 9.24737007829528e-06, "loss": 1.094, "step": 2616 }, { "epoch": 0.20174221399938327, "grad_norm": 3.7555601596832275, "learning_rate": 9.246711227112509e-06, "loss": 1.0754, "step": 2617 }, { "epoch": 0.20181930311440024, "grad_norm": 3.700612783432007, "learning_rate": 9.246052111169283e-06, "loss": 0.989, "step": 2618 }, { "epoch": 0.20189639222941722, "grad_norm": 4.023221015930176, "learning_rate": 9.2453927305067e-06, "loss": 1.0806, "step": 2619 }, { "epoch": 0.20197348134443416, "grad_norm": 3.949838161468506, "learning_rate": 9.244733085165868e-06, "loss": 1.0044, "step": 2620 }, { "epoch": 0.20205057045945113, "grad_norm": 4.30818510055542, "learning_rate": 9.244073175187912e-06, "loss": 1.0475, "step": 2621 }, { "epoch": 0.20212765957446807, "grad_norm": 4.544628620147705, "learning_rate": 9.243413000613974e-06, "loss": 1.0492, "step": 2622 }, { "epoch": 0.20220474868948504, "grad_norm": 3.771693706512451, "learning_rate": 9.242752561485213e-06, "loss": 1.0835, "step": 2623 }, { "epoch": 0.202281837804502, "grad_norm": 3.973714590072632, "learning_rate": 9.242091857842803e-06, "loss": 1.0409, "step": 2624 }, { "epoch": 0.20235892691951896, "grad_norm": 4.235926628112793, "learning_rate": 9.241430889727936e-06, "loss": 1.1188, "step": 2625 }, { "epoch": 0.20243601603453593, "grad_norm": 4.404877185821533, "learning_rate": 9.24076965718182e-06, "loss": 0.9393, "step": 2626 }, { "epoch": 0.2025131051495529, "grad_norm": 3.5260396003723145, "learning_rate": 9.24010816024568e-06, "loss": 0.9963, "step": 2627 }, { "epoch": 0.20259019426456984, "grad_norm": 3.725531816482544, "learning_rate": 9.239446398960756e-06, "loss": 0.9743, "step": 2628 }, { "epoch": 0.2026672833795868, "grad_norm": 3.668250799179077, "learning_rate": 9.238784373368306e-06, "loss": 1.0292, "step": 2629 }, { "epoch": 0.20274437249460375, "grad_norm": 4.116795539855957, "learning_rate": 9.238122083509602e-06, "loss": 1.1924, "step": 2630 }, { "epoch": 0.20282146160962072, "grad_norm": 3.9489383697509766, "learning_rate": 9.237459529425938e-06, "loss": 1.0364, "step": 2631 }, { "epoch": 0.2028985507246377, "grad_norm": 3.3951849937438965, "learning_rate": 9.236796711158617e-06, "loss": 0.9725, "step": 2632 }, { "epoch": 0.20297563983965464, "grad_norm": 3.6997361183166504, "learning_rate": 9.236133628748965e-06, "loss": 1.0287, "step": 2633 }, { "epoch": 0.2030527289546716, "grad_norm": 3.9400417804718018, "learning_rate": 9.235470282238322e-06, "loss": 1.1658, "step": 2634 }, { "epoch": 0.20312981806968855, "grad_norm": 4.31356954574585, "learning_rate": 9.23480667166804e-06, "loss": 1.0993, "step": 2635 }, { "epoch": 0.20320690718470552, "grad_norm": 3.9322855472564697, "learning_rate": 9.234142797079496e-06, "loss": 0.9019, "step": 2636 }, { "epoch": 0.2032839962997225, "grad_norm": 4.056495666503906, "learning_rate": 9.233478658514078e-06, "loss": 1.0006, "step": 2637 }, { "epoch": 0.20336108541473943, "grad_norm": 3.7447257041931152, "learning_rate": 9.232814256013192e-06, "loss": 1.009, "step": 2638 }, { "epoch": 0.2034381745297564, "grad_norm": 3.8734138011932373, "learning_rate": 9.232149589618257e-06, "loss": 0.9408, "step": 2639 }, { "epoch": 0.20351526364477335, "grad_norm": 3.8473434448242188, "learning_rate": 9.231484659370717e-06, "loss": 1.0765, "step": 2640 }, { "epoch": 0.20359235275979032, "grad_norm": 3.7293701171875, "learning_rate": 9.230819465312022e-06, "loss": 1.1238, "step": 2641 }, { "epoch": 0.2036694418748073, "grad_norm": 3.6699728965759277, "learning_rate": 9.230154007483646e-06, "loss": 0.968, "step": 2642 }, { "epoch": 0.20374653098982423, "grad_norm": 3.8388984203338623, "learning_rate": 9.229488285927077e-06, "loss": 0.9983, "step": 2643 }, { "epoch": 0.2038236201048412, "grad_norm": 4.062560558319092, "learning_rate": 9.228822300683817e-06, "loss": 1.0297, "step": 2644 }, { "epoch": 0.20390070921985815, "grad_norm": 3.8813700675964355, "learning_rate": 9.228156051795388e-06, "loss": 1.1485, "step": 2645 }, { "epoch": 0.20397779833487512, "grad_norm": 3.9767556190490723, "learning_rate": 9.227489539303329e-06, "loss": 0.9742, "step": 2646 }, { "epoch": 0.2040548874498921, "grad_norm": 4.134043216705322, "learning_rate": 9.22682276324919e-06, "loss": 1.0414, "step": 2647 }, { "epoch": 0.20413197656490903, "grad_norm": 3.7752439975738525, "learning_rate": 9.226155723674543e-06, "loss": 0.9868, "step": 2648 }, { "epoch": 0.204209065679926, "grad_norm": 3.694692373275757, "learning_rate": 9.225488420620977e-06, "loss": 1.116, "step": 2649 }, { "epoch": 0.20428615479494294, "grad_norm": 3.801795721054077, "learning_rate": 9.22482085413009e-06, "loss": 1.0275, "step": 2650 }, { "epoch": 0.2043632439099599, "grad_norm": 3.7427256107330322, "learning_rate": 9.224153024243505e-06, "loss": 1.1208, "step": 2651 }, { "epoch": 0.20444033302497688, "grad_norm": 4.123345375061035, "learning_rate": 9.223484931002856e-06, "loss": 1.0278, "step": 2652 }, { "epoch": 0.20451742213999383, "grad_norm": 3.854743003845215, "learning_rate": 9.222816574449796e-06, "loss": 1.1139, "step": 2653 }, { "epoch": 0.2045945112550108, "grad_norm": 3.620626449584961, "learning_rate": 9.222147954625992e-06, "loss": 1.03, "step": 2654 }, { "epoch": 0.20467160037002774, "grad_norm": 3.9541471004486084, "learning_rate": 9.22147907157313e-06, "loss": 1.0944, "step": 2655 }, { "epoch": 0.2047486894850447, "grad_norm": 4.048750400543213, "learning_rate": 9.220809925332911e-06, "loss": 1.08, "step": 2656 }, { "epoch": 0.20482577860006168, "grad_norm": 3.7369632720947266, "learning_rate": 9.220140515947056e-06, "loss": 0.989, "step": 2657 }, { "epoch": 0.20490286771507862, "grad_norm": 4.2197489738464355, "learning_rate": 9.219470843457294e-06, "loss": 1.1959, "step": 2658 }, { "epoch": 0.2049799568300956, "grad_norm": 3.825822591781616, "learning_rate": 9.218800907905379e-06, "loss": 1.026, "step": 2659 }, { "epoch": 0.20505704594511254, "grad_norm": 3.7242703437805176, "learning_rate": 9.218130709333077e-06, "loss": 1.0186, "step": 2660 }, { "epoch": 0.2051341350601295, "grad_norm": 3.669600248336792, "learning_rate": 9.217460247782174e-06, "loss": 1.0269, "step": 2661 }, { "epoch": 0.20521122417514648, "grad_norm": 4.220530986785889, "learning_rate": 9.216789523294462e-06, "loss": 1.1483, "step": 2662 }, { "epoch": 0.20528831329016342, "grad_norm": 3.4208221435546875, "learning_rate": 9.216118535911766e-06, "loss": 0.8748, "step": 2663 }, { "epoch": 0.2053654024051804, "grad_norm": 4.243244647979736, "learning_rate": 9.215447285675916e-06, "loss": 1.18, "step": 2664 }, { "epoch": 0.20544249152019733, "grad_norm": 3.644279718399048, "learning_rate": 9.214775772628759e-06, "loss": 1.042, "step": 2665 }, { "epoch": 0.2055195806352143, "grad_norm": 4.032573223114014, "learning_rate": 9.21410399681216e-06, "loss": 1.1426, "step": 2666 }, { "epoch": 0.20559666975023128, "grad_norm": 3.6652822494506836, "learning_rate": 9.213431958268004e-06, "loss": 0.9583, "step": 2667 }, { "epoch": 0.20567375886524822, "grad_norm": 3.9682374000549316, "learning_rate": 9.212759657038186e-06, "loss": 0.9783, "step": 2668 }, { "epoch": 0.2057508479802652, "grad_norm": 3.6445670127868652, "learning_rate": 9.212087093164624e-06, "loss": 0.9417, "step": 2669 }, { "epoch": 0.20582793709528216, "grad_norm": 3.758375406265259, "learning_rate": 9.211414266689245e-06, "loss": 1.0221, "step": 2670 }, { "epoch": 0.2059050262102991, "grad_norm": 4.022796154022217, "learning_rate": 9.210741177653997e-06, "loss": 1.107, "step": 2671 }, { "epoch": 0.20598211532531607, "grad_norm": 3.7436466217041016, "learning_rate": 9.210067826100845e-06, "loss": 1.0894, "step": 2672 }, { "epoch": 0.20605920444033302, "grad_norm": 4.940155982971191, "learning_rate": 9.20939421207177e-06, "loss": 1.0682, "step": 2673 }, { "epoch": 0.20613629355535, "grad_norm": 3.8470208644866943, "learning_rate": 9.208720335608767e-06, "loss": 1.0735, "step": 2674 }, { "epoch": 0.20621338267036696, "grad_norm": 3.703685998916626, "learning_rate": 9.208046196753848e-06, "loss": 1.0533, "step": 2675 }, { "epoch": 0.2062904717853839, "grad_norm": 4.3979291915893555, "learning_rate": 9.207371795549043e-06, "loss": 1.2048, "step": 2676 }, { "epoch": 0.20636756090040087, "grad_norm": 3.9959652423858643, "learning_rate": 9.206697132036395e-06, "loss": 1.01, "step": 2677 }, { "epoch": 0.2064446500154178, "grad_norm": 3.741591453552246, "learning_rate": 9.206022206257969e-06, "loss": 0.971, "step": 2678 }, { "epoch": 0.20652173913043478, "grad_norm": 3.790801763534546, "learning_rate": 9.205347018255844e-06, "loss": 1.2011, "step": 2679 }, { "epoch": 0.20659882824545175, "grad_norm": 4.108438491821289, "learning_rate": 9.20467156807211e-06, "loss": 1.0247, "step": 2680 }, { "epoch": 0.2066759173604687, "grad_norm": 3.6132218837738037, "learning_rate": 9.203995855748882e-06, "loss": 1.0329, "step": 2681 }, { "epoch": 0.20675300647548567, "grad_norm": 3.6847808361053467, "learning_rate": 9.203319881328284e-06, "loss": 1.0839, "step": 2682 }, { "epoch": 0.2068300955905026, "grad_norm": 3.7520806789398193, "learning_rate": 9.202643644852461e-06, "loss": 1.0236, "step": 2683 }, { "epoch": 0.20690718470551958, "grad_norm": 3.8004398345947266, "learning_rate": 9.201967146363572e-06, "loss": 1.0043, "step": 2684 }, { "epoch": 0.20698427382053655, "grad_norm": 3.612555503845215, "learning_rate": 9.201290385903796e-06, "loss": 1.0641, "step": 2685 }, { "epoch": 0.2070613629355535, "grad_norm": 3.6692962646484375, "learning_rate": 9.200613363515325e-06, "loss": 0.9243, "step": 2686 }, { "epoch": 0.20713845205057047, "grad_norm": 3.6443002223968506, "learning_rate": 9.199936079240364e-06, "loss": 1.0226, "step": 2687 }, { "epoch": 0.2072155411655874, "grad_norm": 4.001389503479004, "learning_rate": 9.19925853312114e-06, "loss": 1.092, "step": 2688 }, { "epoch": 0.20729263028060438, "grad_norm": 4.218007564544678, "learning_rate": 9.198580725199894e-06, "loss": 1.0681, "step": 2689 }, { "epoch": 0.20736971939562135, "grad_norm": 4.421291351318359, "learning_rate": 9.197902655518887e-06, "loss": 1.092, "step": 2690 }, { "epoch": 0.2074468085106383, "grad_norm": 3.6270670890808105, "learning_rate": 9.197224324120391e-06, "loss": 0.9676, "step": 2691 }, { "epoch": 0.20752389762565526, "grad_norm": 3.7732226848602295, "learning_rate": 9.196545731046695e-06, "loss": 0.9676, "step": 2692 }, { "epoch": 0.2076009867406722, "grad_norm": 4.031553268432617, "learning_rate": 9.195866876340107e-06, "loss": 1.0286, "step": 2693 }, { "epoch": 0.20767807585568918, "grad_norm": 3.7905490398406982, "learning_rate": 9.195187760042952e-06, "loss": 1.0421, "step": 2694 }, { "epoch": 0.20775516497070615, "grad_norm": 3.822108745574951, "learning_rate": 9.194508382197566e-06, "loss": 1.0981, "step": 2695 }, { "epoch": 0.2078322540857231, "grad_norm": 3.774367332458496, "learning_rate": 9.193828742846307e-06, "loss": 1.0733, "step": 2696 }, { "epoch": 0.20790934320074006, "grad_norm": 4.311622619628906, "learning_rate": 9.193148842031548e-06, "loss": 0.9735, "step": 2697 }, { "epoch": 0.207986432315757, "grad_norm": 3.389493465423584, "learning_rate": 9.192468679795671e-06, "loss": 0.9759, "step": 2698 }, { "epoch": 0.20806352143077397, "grad_norm": 4.131453990936279, "learning_rate": 9.19178825618109e-06, "loss": 1.011, "step": 2699 }, { "epoch": 0.20814061054579094, "grad_norm": 3.75632643699646, "learning_rate": 9.191107571230217e-06, "loss": 0.9215, "step": 2700 }, { "epoch": 0.2082176996608079, "grad_norm": 3.789395332336426, "learning_rate": 9.190426624985497e-06, "loss": 1.1335, "step": 2701 }, { "epoch": 0.20829478877582486, "grad_norm": 4.15241003036499, "learning_rate": 9.189745417489378e-06, "loss": 1.1109, "step": 2702 }, { "epoch": 0.2083718778908418, "grad_norm": 3.8731484413146973, "learning_rate": 9.189063948784332e-06, "loss": 0.9326, "step": 2703 }, { "epoch": 0.20844896700585877, "grad_norm": 3.67352294921875, "learning_rate": 9.188382218912844e-06, "loss": 1.058, "step": 2704 }, { "epoch": 0.20852605612087574, "grad_norm": 3.72332501411438, "learning_rate": 9.187700227917415e-06, "loss": 1.0898, "step": 2705 }, { "epoch": 0.20860314523589268, "grad_norm": 3.6140379905700684, "learning_rate": 9.187017975840568e-06, "loss": 1.0351, "step": 2706 }, { "epoch": 0.20868023435090965, "grad_norm": 3.6925249099731445, "learning_rate": 9.186335462724834e-06, "loss": 1.0802, "step": 2707 }, { "epoch": 0.2087573234659266, "grad_norm": 3.8140275478363037, "learning_rate": 9.185652688612766e-06, "loss": 1.0149, "step": 2708 }, { "epoch": 0.20883441258094357, "grad_norm": 3.920686960220337, "learning_rate": 9.184969653546932e-06, "loss": 1.1498, "step": 2709 }, { "epoch": 0.20891150169596054, "grad_norm": 3.6589865684509277, "learning_rate": 9.184286357569913e-06, "loss": 0.9951, "step": 2710 }, { "epoch": 0.20898859081097748, "grad_norm": 3.9114480018615723, "learning_rate": 9.183602800724311e-06, "loss": 1.1772, "step": 2711 }, { "epoch": 0.20906567992599445, "grad_norm": 3.792837381362915, "learning_rate": 9.182918983052743e-06, "loss": 0.9794, "step": 2712 }, { "epoch": 0.20914276904101142, "grad_norm": 3.940286636352539, "learning_rate": 9.182234904597838e-06, "loss": 1.0731, "step": 2713 }, { "epoch": 0.20921985815602837, "grad_norm": 3.8480448722839355, "learning_rate": 9.181550565402248e-06, "loss": 1.1191, "step": 2714 }, { "epoch": 0.20929694727104534, "grad_norm": 3.821777820587158, "learning_rate": 9.180865965508638e-06, "loss": 0.9818, "step": 2715 }, { "epoch": 0.20937403638606228, "grad_norm": 4.0068511962890625, "learning_rate": 9.180181104959686e-06, "loss": 1.0953, "step": 2716 }, { "epoch": 0.20945112550107925, "grad_norm": 3.940507650375366, "learning_rate": 9.179495983798094e-06, "loss": 1.07, "step": 2717 }, { "epoch": 0.20952821461609622, "grad_norm": 4.213906288146973, "learning_rate": 9.178810602066575e-06, "loss": 1.0311, "step": 2718 }, { "epoch": 0.20960530373111316, "grad_norm": 3.6800248622894287, "learning_rate": 9.178124959807854e-06, "loss": 0.9295, "step": 2719 }, { "epoch": 0.20968239284613013, "grad_norm": 3.5638949871063232, "learning_rate": 9.177439057064684e-06, "loss": 0.9917, "step": 2720 }, { "epoch": 0.20975948196114708, "grad_norm": 4.166075229644775, "learning_rate": 9.17675289387982e-06, "loss": 1.0279, "step": 2721 }, { "epoch": 0.20983657107616405, "grad_norm": 3.7445201873779297, "learning_rate": 9.17606647029605e-06, "loss": 1.0408, "step": 2722 }, { "epoch": 0.20991366019118102, "grad_norm": 3.632784128189087, "learning_rate": 9.175379786356162e-06, "loss": 1.0679, "step": 2723 }, { "epoch": 0.20999074930619796, "grad_norm": 3.969790458679199, "learning_rate": 9.174692842102968e-06, "loss": 1.1247, "step": 2724 }, { "epoch": 0.21006783842121493, "grad_norm": 3.7024993896484375, "learning_rate": 9.174005637579297e-06, "loss": 1.0884, "step": 2725 }, { "epoch": 0.21014492753623187, "grad_norm": 4.002373695373535, "learning_rate": 9.173318172827994e-06, "loss": 1.1234, "step": 2726 }, { "epoch": 0.21022201665124884, "grad_norm": 3.7659733295440674, "learning_rate": 9.172630447891915e-06, "loss": 1.0724, "step": 2727 }, { "epoch": 0.21029910576626581, "grad_norm": 4.004013538360596, "learning_rate": 9.171942462813938e-06, "loss": 1.0574, "step": 2728 }, { "epoch": 0.21037619488128276, "grad_norm": 3.9406075477600098, "learning_rate": 9.171254217636958e-06, "loss": 1.1462, "step": 2729 }, { "epoch": 0.21045328399629973, "grad_norm": 4.120555877685547, "learning_rate": 9.17056571240388e-06, "loss": 1.0335, "step": 2730 }, { "epoch": 0.21053037311131667, "grad_norm": 3.955146312713623, "learning_rate": 9.169876947157628e-06, "loss": 1.0698, "step": 2731 }, { "epoch": 0.21060746222633364, "grad_norm": 4.344998836517334, "learning_rate": 9.169187921941148e-06, "loss": 1.1751, "step": 2732 }, { "epoch": 0.2106845513413506, "grad_norm": 4.642431259155273, "learning_rate": 9.16849863679739e-06, "loss": 1.1332, "step": 2733 }, { "epoch": 0.21076164045636755, "grad_norm": 3.788193464279175, "learning_rate": 9.167809091769334e-06, "loss": 1.0431, "step": 2734 }, { "epoch": 0.21083872957138453, "grad_norm": 3.8150315284729004, "learning_rate": 9.167119286899965e-06, "loss": 1.1172, "step": 2735 }, { "epoch": 0.21091581868640147, "grad_norm": 4.12971305847168, "learning_rate": 9.16642922223229e-06, "loss": 1.0524, "step": 2736 }, { "epoch": 0.21099290780141844, "grad_norm": 3.7752163410186768, "learning_rate": 9.165738897809335e-06, "loss": 1.0584, "step": 2737 }, { "epoch": 0.2110699969164354, "grad_norm": 4.05067253112793, "learning_rate": 9.165048313674131e-06, "loss": 1.1037, "step": 2738 }, { "epoch": 0.21114708603145235, "grad_norm": 4.372398376464844, "learning_rate": 9.164357469869737e-06, "loss": 1.0896, "step": 2739 }, { "epoch": 0.21122417514646932, "grad_norm": 3.3885154724121094, "learning_rate": 9.163666366439223e-06, "loss": 0.8929, "step": 2740 }, { "epoch": 0.21130126426148627, "grad_norm": 3.9432756900787354, "learning_rate": 9.162975003425676e-06, "loss": 1.0253, "step": 2741 }, { "epoch": 0.21137835337650324, "grad_norm": 3.852996587753296, "learning_rate": 9.162283380872197e-06, "loss": 0.9896, "step": 2742 }, { "epoch": 0.2114554424915202, "grad_norm": 3.6689140796661377, "learning_rate": 9.161591498821907e-06, "loss": 1.1041, "step": 2743 }, { "epoch": 0.21153253160653715, "grad_norm": 4.0420403480529785, "learning_rate": 9.16089935731794e-06, "loss": 1.0409, "step": 2744 }, { "epoch": 0.21160962072155412, "grad_norm": 3.9778621196746826, "learning_rate": 9.160206956403448e-06, "loss": 1.0178, "step": 2745 }, { "epoch": 0.21168670983657106, "grad_norm": 4.711827278137207, "learning_rate": 9.1595142961216e-06, "loss": 0.9995, "step": 2746 }, { "epoch": 0.21176379895158803, "grad_norm": 3.6355984210968018, "learning_rate": 9.158821376515574e-06, "loss": 1.0407, "step": 2747 }, { "epoch": 0.211840888066605, "grad_norm": 4.1716203689575195, "learning_rate": 9.158128197628578e-06, "loss": 0.9243, "step": 2748 }, { "epoch": 0.21191797718162195, "grad_norm": 3.8837966918945312, "learning_rate": 9.157434759503823e-06, "loss": 1.0692, "step": 2749 }, { "epoch": 0.21199506629663892, "grad_norm": 4.1938700675964355, "learning_rate": 9.156741062184543e-06, "loss": 1.0286, "step": 2750 }, { "epoch": 0.21207215541165586, "grad_norm": 3.5614426136016846, "learning_rate": 9.156047105713986e-06, "loss": 1.0052, "step": 2751 }, { "epoch": 0.21214924452667283, "grad_norm": 3.653048276901245, "learning_rate": 9.155352890135417e-06, "loss": 1.0898, "step": 2752 }, { "epoch": 0.2122263336416898, "grad_norm": 3.959932327270508, "learning_rate": 9.154658415492116e-06, "loss": 1.1145, "step": 2753 }, { "epoch": 0.21230342275670674, "grad_norm": 3.750103235244751, "learning_rate": 9.153963681827382e-06, "loss": 1.018, "step": 2754 }, { "epoch": 0.21238051187172372, "grad_norm": 4.643415451049805, "learning_rate": 9.153268689184524e-06, "loss": 1.0096, "step": 2755 }, { "epoch": 0.21245760098674069, "grad_norm": 4.112432956695557, "learning_rate": 9.152573437606874e-06, "loss": 1.1197, "step": 2756 }, { "epoch": 0.21253469010175763, "grad_norm": 3.4916539192199707, "learning_rate": 9.151877927137778e-06, "loss": 1.0737, "step": 2757 }, { "epoch": 0.2126117792167746, "grad_norm": 3.6449830532073975, "learning_rate": 9.151182157820595e-06, "loss": 1.031, "step": 2758 }, { "epoch": 0.21268886833179154, "grad_norm": 3.4803693294525146, "learning_rate": 9.150486129698706e-06, "loss": 1.0208, "step": 2759 }, { "epoch": 0.2127659574468085, "grad_norm": 4.427255153656006, "learning_rate": 9.1497898428155e-06, "loss": 1.1607, "step": 2760 }, { "epoch": 0.21284304656182548, "grad_norm": 3.903338670730591, "learning_rate": 9.149093297214392e-06, "loss": 1.0678, "step": 2761 }, { "epoch": 0.21292013567684243, "grad_norm": 3.8415660858154297, "learning_rate": 9.148396492938806e-06, "loss": 1.0445, "step": 2762 }, { "epoch": 0.2129972247918594, "grad_norm": 4.833527565002441, "learning_rate": 9.14769943003218e-06, "loss": 1.2442, "step": 2763 }, { "epoch": 0.21307431390687634, "grad_norm": 3.9709668159484863, "learning_rate": 9.14700210853798e-06, "loss": 1.0287, "step": 2764 }, { "epoch": 0.2131514030218933, "grad_norm": 4.086493015289307, "learning_rate": 9.146304528499674e-06, "loss": 0.9555, "step": 2765 }, { "epoch": 0.21322849213691028, "grad_norm": 3.9784085750579834, "learning_rate": 9.145606689960756e-06, "loss": 1.0339, "step": 2766 }, { "epoch": 0.21330558125192722, "grad_norm": 4.03312873840332, "learning_rate": 9.144908592964732e-06, "loss": 1.0068, "step": 2767 }, { "epoch": 0.2133826703669442, "grad_norm": 4.233692646026611, "learning_rate": 9.144210237555124e-06, "loss": 1.1303, "step": 2768 }, { "epoch": 0.21345975948196114, "grad_norm": 4.059010982513428, "learning_rate": 9.143511623775469e-06, "loss": 0.9926, "step": 2769 }, { "epoch": 0.2135368485969781, "grad_norm": 3.532331705093384, "learning_rate": 9.142812751669327e-06, "loss": 0.9392, "step": 2770 }, { "epoch": 0.21361393771199508, "grad_norm": 3.3265960216522217, "learning_rate": 9.142113621280265e-06, "loss": 0.9405, "step": 2771 }, { "epoch": 0.21369102682701202, "grad_norm": 3.408034324645996, "learning_rate": 9.14141423265187e-06, "loss": 0.9525, "step": 2772 }, { "epoch": 0.213768115942029, "grad_norm": 3.924441337585449, "learning_rate": 9.140714585827748e-06, "loss": 1.0772, "step": 2773 }, { "epoch": 0.21384520505704593, "grad_norm": 3.8719756603240967, "learning_rate": 9.140014680851516e-06, "loss": 0.9904, "step": 2774 }, { "epoch": 0.2139222941720629, "grad_norm": 3.7581069469451904, "learning_rate": 9.139314517766811e-06, "loss": 1.0082, "step": 2775 }, { "epoch": 0.21399938328707988, "grad_norm": 3.611703872680664, "learning_rate": 9.138614096617285e-06, "loss": 0.9756, "step": 2776 }, { "epoch": 0.21407647240209682, "grad_norm": 3.581341505050659, "learning_rate": 9.137913417446603e-06, "loss": 1.0092, "step": 2777 }, { "epoch": 0.2141535615171138, "grad_norm": 3.8926100730895996, "learning_rate": 9.137212480298451e-06, "loss": 1.0584, "step": 2778 }, { "epoch": 0.21423065063213073, "grad_norm": 3.6705634593963623, "learning_rate": 9.136511285216527e-06, "loss": 1.0492, "step": 2779 }, { "epoch": 0.2143077397471477, "grad_norm": 3.6244938373565674, "learning_rate": 9.13580983224455e-06, "loss": 0.9325, "step": 2780 }, { "epoch": 0.21438482886216467, "grad_norm": 3.5282609462738037, "learning_rate": 9.135108121426247e-06, "loss": 0.9749, "step": 2781 }, { "epoch": 0.21446191797718162, "grad_norm": 4.567509651184082, "learning_rate": 9.13440615280537e-06, "loss": 1.0813, "step": 2782 }, { "epoch": 0.21453900709219859, "grad_norm": 3.8714842796325684, "learning_rate": 9.133703926425683e-06, "loss": 1.0343, "step": 2783 }, { "epoch": 0.21461609620721553, "grad_norm": 3.845766305923462, "learning_rate": 9.133001442330964e-06, "loss": 1.0322, "step": 2784 }, { "epoch": 0.2146931853222325, "grad_norm": 3.7517282962799072, "learning_rate": 9.132298700565011e-06, "loss": 1.0577, "step": 2785 }, { "epoch": 0.21477027443724947, "grad_norm": 3.8736910820007324, "learning_rate": 9.131595701171636e-06, "loss": 1.0193, "step": 2786 }, { "epoch": 0.2148473635522664, "grad_norm": 3.8819291591644287, "learning_rate": 9.130892444194666e-06, "loss": 1.0163, "step": 2787 }, { "epoch": 0.21492445266728338, "grad_norm": 3.802039623260498, "learning_rate": 9.130188929677948e-06, "loss": 1.0257, "step": 2788 }, { "epoch": 0.21500154178230033, "grad_norm": 4.080732822418213, "learning_rate": 9.129485157665339e-06, "loss": 0.9556, "step": 2789 }, { "epoch": 0.2150786308973173, "grad_norm": 3.748866081237793, "learning_rate": 9.12878112820072e-06, "loss": 0.9364, "step": 2790 }, { "epoch": 0.21515572001233427, "grad_norm": 3.8058340549468994, "learning_rate": 9.12807684132798e-06, "loss": 0.9971, "step": 2791 }, { "epoch": 0.2152328091273512, "grad_norm": 4.344945430755615, "learning_rate": 9.127372297091028e-06, "loss": 1.1081, "step": 2792 }, { "epoch": 0.21530989824236818, "grad_norm": 4.168917655944824, "learning_rate": 9.126667495533791e-06, "loss": 1.0641, "step": 2793 }, { "epoch": 0.21538698735738512, "grad_norm": 3.46258282661438, "learning_rate": 9.125962436700207e-06, "loss": 0.9893, "step": 2794 }, { "epoch": 0.2154640764724021, "grad_norm": 3.5184786319732666, "learning_rate": 9.125257120634234e-06, "loss": 1.0043, "step": 2795 }, { "epoch": 0.21554116558741906, "grad_norm": 3.7548696994781494, "learning_rate": 9.124551547379846e-06, "loss": 0.9768, "step": 2796 }, { "epoch": 0.215618254702436, "grad_norm": 3.801421642303467, "learning_rate": 9.12384571698103e-06, "loss": 0.9912, "step": 2797 }, { "epoch": 0.21569534381745298, "grad_norm": 4.047616004943848, "learning_rate": 9.123139629481792e-06, "loss": 1.0559, "step": 2798 }, { "epoch": 0.21577243293246995, "grad_norm": 3.8471899032592773, "learning_rate": 9.12243328492615e-06, "loss": 1.099, "step": 2799 }, { "epoch": 0.2158495220474869, "grad_norm": 3.7033841609954834, "learning_rate": 9.121726683358146e-06, "loss": 1.0111, "step": 2800 }, { "epoch": 0.21592661116250386, "grad_norm": 4.303826808929443, "learning_rate": 9.12101982482183e-06, "loss": 1.025, "step": 2801 }, { "epoch": 0.2160037002775208, "grad_norm": 3.404069662094116, "learning_rate": 9.120312709361271e-06, "loss": 0.9853, "step": 2802 }, { "epoch": 0.21608078939253778, "grad_norm": 3.566990613937378, "learning_rate": 9.119605337020554e-06, "loss": 1.0534, "step": 2803 }, { "epoch": 0.21615787850755475, "grad_norm": 4.058454990386963, "learning_rate": 9.118897707843779e-06, "loss": 0.9761, "step": 2804 }, { "epoch": 0.2162349676225717, "grad_norm": 3.7274980545043945, "learning_rate": 9.118189821875066e-06, "loss": 0.9726, "step": 2805 }, { "epoch": 0.21631205673758866, "grad_norm": 3.776718854904175, "learning_rate": 9.117481679158546e-06, "loss": 1.0489, "step": 2806 }, { "epoch": 0.2163891458526056, "grad_norm": 3.99607253074646, "learning_rate": 9.116773279738367e-06, "loss": 0.9758, "step": 2807 }, { "epoch": 0.21646623496762257, "grad_norm": 3.691988945007324, "learning_rate": 9.116064623658696e-06, "loss": 1.0635, "step": 2808 }, { "epoch": 0.21654332408263954, "grad_norm": 4.369837284088135, "learning_rate": 9.115355710963714e-06, "loss": 1.1056, "step": 2809 }, { "epoch": 0.2166204131976565, "grad_norm": 3.6378722190856934, "learning_rate": 9.114646541697617e-06, "loss": 0.8665, "step": 2810 }, { "epoch": 0.21669750231267346, "grad_norm": 3.6210851669311523, "learning_rate": 9.113937115904618e-06, "loss": 0.9098, "step": 2811 }, { "epoch": 0.2167745914276904, "grad_norm": 3.999722480773926, "learning_rate": 9.113227433628948e-06, "loss": 0.9969, "step": 2812 }, { "epoch": 0.21685168054270737, "grad_norm": 3.984334707260132, "learning_rate": 9.112517494914847e-06, "loss": 1.0185, "step": 2813 }, { "epoch": 0.21692876965772434, "grad_norm": 3.8469057083129883, "learning_rate": 9.111807299806584e-06, "loss": 1.0389, "step": 2814 }, { "epoch": 0.21700585877274128, "grad_norm": 4.206716537475586, "learning_rate": 9.111096848348429e-06, "loss": 1.0314, "step": 2815 }, { "epoch": 0.21708294788775825, "grad_norm": 4.628408908843994, "learning_rate": 9.110386140584677e-06, "loss": 1.0994, "step": 2816 }, { "epoch": 0.2171600370027752, "grad_norm": 3.524751663208008, "learning_rate": 9.109675176559639e-06, "loss": 0.9583, "step": 2817 }, { "epoch": 0.21723712611779217, "grad_norm": 3.7516028881073, "learning_rate": 9.108963956317635e-06, "loss": 1.0813, "step": 2818 }, { "epoch": 0.21731421523280914, "grad_norm": 4.167080879211426, "learning_rate": 9.108252479903012e-06, "loss": 1.0686, "step": 2819 }, { "epoch": 0.21739130434782608, "grad_norm": 4.301987648010254, "learning_rate": 9.107540747360124e-06, "loss": 1.0793, "step": 2820 }, { "epoch": 0.21746839346284305, "grad_norm": 4.356029987335205, "learning_rate": 9.106828758733344e-06, "loss": 1.0524, "step": 2821 }, { "epoch": 0.21754548257786, "grad_norm": 3.900385618209839, "learning_rate": 9.10611651406706e-06, "loss": 1.0539, "step": 2822 }, { "epoch": 0.21762257169287696, "grad_norm": 3.5353188514709473, "learning_rate": 9.105404013405677e-06, "loss": 1.0595, "step": 2823 }, { "epoch": 0.21769966080789394, "grad_norm": 3.6671149730682373, "learning_rate": 9.104691256793618e-06, "loss": 1.1179, "step": 2824 }, { "epoch": 0.21777674992291088, "grad_norm": 3.817493200302124, "learning_rate": 9.103978244275316e-06, "loss": 0.9962, "step": 2825 }, { "epoch": 0.21785383903792785, "grad_norm": 3.781205654144287, "learning_rate": 9.103264975895227e-06, "loss": 1.0873, "step": 2826 }, { "epoch": 0.2179309281529448, "grad_norm": 3.8384957313537598, "learning_rate": 9.102551451697816e-06, "loss": 1.0985, "step": 2827 }, { "epoch": 0.21800801726796176, "grad_norm": 3.9920754432678223, "learning_rate": 9.10183767172757e-06, "loss": 1.1307, "step": 2828 }, { "epoch": 0.21808510638297873, "grad_norm": 3.888017416000366, "learning_rate": 9.101123636028993e-06, "loss": 1.0162, "step": 2829 }, { "epoch": 0.21816219549799568, "grad_norm": 3.8230643272399902, "learning_rate": 9.100409344646593e-06, "loss": 1.0684, "step": 2830 }, { "epoch": 0.21823928461301265, "grad_norm": 3.7765209674835205, "learning_rate": 9.09969479762491e-06, "loss": 0.9862, "step": 2831 }, { "epoch": 0.2183163737280296, "grad_norm": 3.8649239540100098, "learning_rate": 9.098979995008486e-06, "loss": 1.058, "step": 2832 }, { "epoch": 0.21839346284304656, "grad_norm": 3.9235241413116455, "learning_rate": 9.098264936841891e-06, "loss": 1.0378, "step": 2833 }, { "epoch": 0.21847055195806353, "grad_norm": 4.208407878875732, "learning_rate": 9.097549623169701e-06, "loss": 1.0499, "step": 2834 }, { "epoch": 0.21854764107308047, "grad_norm": 3.320283889770508, "learning_rate": 9.096834054036516e-06, "loss": 0.9359, "step": 2835 }, { "epoch": 0.21862473018809744, "grad_norm": 4.07008695602417, "learning_rate": 9.096118229486945e-06, "loss": 1.193, "step": 2836 }, { "epoch": 0.2187018193031144, "grad_norm": 3.860478162765503, "learning_rate": 9.095402149565615e-06, "loss": 0.9158, "step": 2837 }, { "epoch": 0.21877890841813136, "grad_norm": 4.054520606994629, "learning_rate": 9.094685814317174e-06, "loss": 1.0763, "step": 2838 }, { "epoch": 0.21885599753314833, "grad_norm": 3.9474873542785645, "learning_rate": 9.093969223786277e-06, "loss": 1.0329, "step": 2839 }, { "epoch": 0.21893308664816527, "grad_norm": 3.73187255859375, "learning_rate": 9.093252378017604e-06, "loss": 1.0298, "step": 2840 }, { "epoch": 0.21901017576318224, "grad_norm": 4.097843647003174, "learning_rate": 9.092535277055845e-06, "loss": 1.1312, "step": 2841 }, { "epoch": 0.2190872648781992, "grad_norm": 3.9080405235290527, "learning_rate": 9.091817920945704e-06, "loss": 0.9503, "step": 2842 }, { "epoch": 0.21916435399321615, "grad_norm": 3.772008180618286, "learning_rate": 9.09110030973191e-06, "loss": 0.9951, "step": 2843 }, { "epoch": 0.21924144310823312, "grad_norm": 3.8021061420440674, "learning_rate": 9.090382443459201e-06, "loss": 1.068, "step": 2844 }, { "epoch": 0.21931853222325007, "grad_norm": 3.884033203125, "learning_rate": 9.089664322172331e-06, "loss": 0.9915, "step": 2845 }, { "epoch": 0.21939562133826704, "grad_norm": 3.7119226455688477, "learning_rate": 9.08894594591607e-06, "loss": 0.9613, "step": 2846 }, { "epoch": 0.219472710453284, "grad_norm": 3.494401454925537, "learning_rate": 9.088227314735208e-06, "loss": 0.912, "step": 2847 }, { "epoch": 0.21954979956830095, "grad_norm": 4.098977088928223, "learning_rate": 9.087508428674546e-06, "loss": 0.9818, "step": 2848 }, { "epoch": 0.21962688868331792, "grad_norm": 3.923501491546631, "learning_rate": 9.086789287778902e-06, "loss": 0.9673, "step": 2849 }, { "epoch": 0.21970397779833487, "grad_norm": 3.787045955657959, "learning_rate": 9.086069892093114e-06, "loss": 1.0714, "step": 2850 }, { "epoch": 0.21978106691335184, "grad_norm": 4.722983360290527, "learning_rate": 9.085350241662028e-06, "loss": 1.0963, "step": 2851 }, { "epoch": 0.2198581560283688, "grad_norm": 3.732903242111206, "learning_rate": 9.084630336530515e-06, "loss": 1.0903, "step": 2852 }, { "epoch": 0.21993524514338575, "grad_norm": 4.350831508636475, "learning_rate": 9.083910176743455e-06, "loss": 1.1121, "step": 2853 }, { "epoch": 0.22001233425840272, "grad_norm": 3.895369291305542, "learning_rate": 9.083189762345746e-06, "loss": 0.9028, "step": 2854 }, { "epoch": 0.22008942337341966, "grad_norm": 4.156214237213135, "learning_rate": 9.082469093382303e-06, "loss": 1.0989, "step": 2855 }, { "epoch": 0.22016651248843663, "grad_norm": 3.565054416656494, "learning_rate": 9.081748169898054e-06, "loss": 1.0293, "step": 2856 }, { "epoch": 0.2202436016034536, "grad_norm": 4.331825256347656, "learning_rate": 9.08102699193795e-06, "loss": 1.1025, "step": 2857 }, { "epoch": 0.22032069071847055, "grad_norm": 5.1282734870910645, "learning_rate": 9.080305559546947e-06, "loss": 0.9675, "step": 2858 }, { "epoch": 0.22039777983348752, "grad_norm": 4.076694011688232, "learning_rate": 9.079583872770025e-06, "loss": 1.0403, "step": 2859 }, { "epoch": 0.22047486894850446, "grad_norm": 3.9768247604370117, "learning_rate": 9.078861931652178e-06, "loss": 1.0653, "step": 2860 }, { "epoch": 0.22055195806352143, "grad_norm": 3.875684976577759, "learning_rate": 9.078139736238414e-06, "loss": 0.8698, "step": 2861 }, { "epoch": 0.2206290471785384, "grad_norm": 3.808002471923828, "learning_rate": 9.077417286573759e-06, "loss": 1.0765, "step": 2862 }, { "epoch": 0.22070613629355534, "grad_norm": 3.7562291622161865, "learning_rate": 9.076694582703252e-06, "loss": 0.9728, "step": 2863 }, { "epoch": 0.22078322540857231, "grad_norm": 3.816787004470825, "learning_rate": 9.075971624671953e-06, "loss": 1.1007, "step": 2864 }, { "epoch": 0.22086031452358926, "grad_norm": 3.6228833198547363, "learning_rate": 9.075248412524932e-06, "loss": 0.9763, "step": 2865 }, { "epoch": 0.22093740363860623, "grad_norm": 3.993579864501953, "learning_rate": 9.074524946307282e-06, "loss": 0.9982, "step": 2866 }, { "epoch": 0.2210144927536232, "grad_norm": 3.9125168323516846, "learning_rate": 9.0738012260641e-06, "loss": 1.015, "step": 2867 }, { "epoch": 0.22109158186864014, "grad_norm": 3.7729082107543945, "learning_rate": 9.073077251840514e-06, "loss": 0.8864, "step": 2868 }, { "epoch": 0.2211686709836571, "grad_norm": 3.5397660732269287, "learning_rate": 9.072353023681653e-06, "loss": 0.9667, "step": 2869 }, { "epoch": 0.22124576009867405, "grad_norm": 4.714186668395996, "learning_rate": 9.071628541632675e-06, "loss": 0.8968, "step": 2870 }, { "epoch": 0.22132284921369103, "grad_norm": 4.182320594787598, "learning_rate": 9.070903805738744e-06, "loss": 1.1579, "step": 2871 }, { "epoch": 0.221399938328708, "grad_norm": 3.612896203994751, "learning_rate": 9.070178816045043e-06, "loss": 1.0278, "step": 2872 }, { "epoch": 0.22147702744372494, "grad_norm": 3.8569085597991943, "learning_rate": 9.069453572596774e-06, "loss": 1.0517, "step": 2873 }, { "epoch": 0.2215541165587419, "grad_norm": 3.7985715866088867, "learning_rate": 9.068728075439153e-06, "loss": 1.112, "step": 2874 }, { "epoch": 0.22163120567375885, "grad_norm": 3.7914130687713623, "learning_rate": 9.068002324617407e-06, "loss": 1.1309, "step": 2875 }, { "epoch": 0.22170829478877582, "grad_norm": 3.6983115673065186, "learning_rate": 9.067276320176783e-06, "loss": 1.0458, "step": 2876 }, { "epoch": 0.2217853839037928, "grad_norm": 3.6084189414978027, "learning_rate": 9.066550062162547e-06, "loss": 0.9881, "step": 2877 }, { "epoch": 0.22186247301880974, "grad_norm": 3.6229233741760254, "learning_rate": 9.065823550619976e-06, "loss": 0.9464, "step": 2878 }, { "epoch": 0.2219395621338267, "grad_norm": 3.949673652648926, "learning_rate": 9.065096785594364e-06, "loss": 1.0257, "step": 2879 }, { "epoch": 0.22201665124884365, "grad_norm": 3.5906119346618652, "learning_rate": 9.064369767131021e-06, "loss": 0.9943, "step": 2880 }, { "epoch": 0.22209374036386062, "grad_norm": 3.8634934425354004, "learning_rate": 9.063642495275273e-06, "loss": 1.015, "step": 2881 }, { "epoch": 0.2221708294788776, "grad_norm": 4.381479740142822, "learning_rate": 9.062914970072463e-06, "loss": 1.1069, "step": 2882 }, { "epoch": 0.22224791859389453, "grad_norm": 3.475149393081665, "learning_rate": 9.062187191567947e-06, "loss": 1.0361, "step": 2883 }, { "epoch": 0.2223250077089115, "grad_norm": 3.652864694595337, "learning_rate": 9.061459159807096e-06, "loss": 0.9466, "step": 2884 }, { "epoch": 0.22240209682392847, "grad_norm": 3.7307932376861572, "learning_rate": 9.060730874835301e-06, "loss": 1.0076, "step": 2885 }, { "epoch": 0.22247918593894542, "grad_norm": 4.080811977386475, "learning_rate": 9.060002336697968e-06, "loss": 1.0577, "step": 2886 }, { "epoch": 0.2225562750539624, "grad_norm": 3.7193477153778076, "learning_rate": 9.059273545440516e-06, "loss": 1.1106, "step": 2887 }, { "epoch": 0.22263336416897933, "grad_norm": 3.971679925918579, "learning_rate": 9.058544501108383e-06, "loss": 1.0908, "step": 2888 }, { "epoch": 0.2227104532839963, "grad_norm": 3.5091354846954346, "learning_rate": 9.05781520374702e-06, "loss": 0.8429, "step": 2889 }, { "epoch": 0.22278754239901327, "grad_norm": 3.8452577590942383, "learning_rate": 9.057085653401896e-06, "loss": 1.0097, "step": 2890 }, { "epoch": 0.22286463151403021, "grad_norm": 3.8790271282196045, "learning_rate": 9.056355850118492e-06, "loss": 1.1066, "step": 2891 }, { "epoch": 0.22294172062904719, "grad_norm": 4.002335071563721, "learning_rate": 9.055625793942308e-06, "loss": 1.0349, "step": 2892 }, { "epoch": 0.22301880974406413, "grad_norm": 3.5299620628356934, "learning_rate": 9.054895484918863e-06, "loss": 0.9838, "step": 2893 }, { "epoch": 0.2230958988590811, "grad_norm": 4.129887104034424, "learning_rate": 9.054164923093684e-06, "loss": 1.1289, "step": 2894 }, { "epoch": 0.22317298797409807, "grad_norm": 3.8371896743774414, "learning_rate": 9.05343410851232e-06, "loss": 0.9709, "step": 2895 }, { "epoch": 0.223250077089115, "grad_norm": 3.9022574424743652, "learning_rate": 9.052703041220332e-06, "loss": 1.043, "step": 2896 }, { "epoch": 0.22332716620413198, "grad_norm": 3.414318323135376, "learning_rate": 9.0519717212633e-06, "loss": 1.0308, "step": 2897 }, { "epoch": 0.22340425531914893, "grad_norm": 3.9684841632843018, "learning_rate": 9.051240148686815e-06, "loss": 0.9831, "step": 2898 }, { "epoch": 0.2234813444341659, "grad_norm": 3.743062973022461, "learning_rate": 9.050508323536488e-06, "loss": 1.1446, "step": 2899 }, { "epoch": 0.22355843354918287, "grad_norm": 4.060634613037109, "learning_rate": 9.049776245857947e-06, "loss": 1.1005, "step": 2900 }, { "epoch": 0.2236355226641998, "grad_norm": 3.9923877716064453, "learning_rate": 9.049043915696831e-06, "loss": 0.9606, "step": 2901 }, { "epoch": 0.22371261177921678, "grad_norm": 3.815098285675049, "learning_rate": 9.048311333098798e-06, "loss": 1.1327, "step": 2902 }, { "epoch": 0.22378970089423372, "grad_norm": 3.9265224933624268, "learning_rate": 9.04757849810952e-06, "loss": 1.0604, "step": 2903 }, { "epoch": 0.2238667900092507, "grad_norm": 3.8014273643493652, "learning_rate": 9.046845410774685e-06, "loss": 0.9963, "step": 2904 }, { "epoch": 0.22394387912426766, "grad_norm": 4.092421531677246, "learning_rate": 9.04611207114e-06, "loss": 1.1604, "step": 2905 }, { "epoch": 0.2240209682392846, "grad_norm": 4.311123847961426, "learning_rate": 9.045378479251179e-06, "loss": 1.1204, "step": 2906 }, { "epoch": 0.22409805735430158, "grad_norm": 3.77968430519104, "learning_rate": 9.044644635153963e-06, "loss": 1.0621, "step": 2907 }, { "epoch": 0.22417514646931852, "grad_norm": 3.6823370456695557, "learning_rate": 9.0439105388941e-06, "loss": 0.9396, "step": 2908 }, { "epoch": 0.2242522355843355, "grad_norm": 3.8284711837768555, "learning_rate": 9.043176190517362e-06, "loss": 1.1167, "step": 2909 }, { "epoch": 0.22432932469935246, "grad_norm": 3.750458002090454, "learning_rate": 9.042441590069526e-06, "loss": 0.9483, "step": 2910 }, { "epoch": 0.2244064138143694, "grad_norm": 3.8216614723205566, "learning_rate": 9.041706737596397e-06, "loss": 1.0972, "step": 2911 }, { "epoch": 0.22448350292938637, "grad_norm": 4.093216896057129, "learning_rate": 9.040971633143782e-06, "loss": 1.2209, "step": 2912 }, { "epoch": 0.22456059204440332, "grad_norm": 3.4413156509399414, "learning_rate": 9.040236276757514e-06, "loss": 0.8431, "step": 2913 }, { "epoch": 0.2246376811594203, "grad_norm": 3.6368567943573, "learning_rate": 9.03950066848344e-06, "loss": 1.0675, "step": 2914 }, { "epoch": 0.22471477027443726, "grad_norm": 3.8164429664611816, "learning_rate": 9.038764808367422e-06, "loss": 1.1033, "step": 2915 }, { "epoch": 0.2247918593894542, "grad_norm": 3.761277198791504, "learning_rate": 9.038028696455335e-06, "loss": 1.0389, "step": 2916 }, { "epoch": 0.22486894850447117, "grad_norm": 3.7666642665863037, "learning_rate": 9.03729233279307e-06, "loss": 1.0024, "step": 2917 }, { "epoch": 0.22494603761948812, "grad_norm": 3.848273992538452, "learning_rate": 9.03655571742654e-06, "loss": 1.0972, "step": 2918 }, { "epoch": 0.22502312673450509, "grad_norm": 3.224196672439575, "learning_rate": 9.035818850401667e-06, "loss": 0.9211, "step": 2919 }, { "epoch": 0.22510021584952206, "grad_norm": 3.469214916229248, "learning_rate": 9.03508173176439e-06, "loss": 1.0197, "step": 2920 }, { "epoch": 0.225177304964539, "grad_norm": 3.8008034229278564, "learning_rate": 9.034344361560663e-06, "loss": 1.0002, "step": 2921 }, { "epoch": 0.22525439407955597, "grad_norm": 3.6283414363861084, "learning_rate": 9.033606739836463e-06, "loss": 1.0574, "step": 2922 }, { "epoch": 0.22533148319457294, "grad_norm": 4.280021667480469, "learning_rate": 9.032868866637772e-06, "loss": 1.0326, "step": 2923 }, { "epoch": 0.22540857230958988, "grad_norm": 3.6488280296325684, "learning_rate": 9.032130742010594e-06, "loss": 1.0502, "step": 2924 }, { "epoch": 0.22548566142460685, "grad_norm": 3.933264970779419, "learning_rate": 9.031392366000945e-06, "loss": 1.1104, "step": 2925 }, { "epoch": 0.2255627505396238, "grad_norm": 3.7967422008514404, "learning_rate": 9.030653738654864e-06, "loss": 1.0144, "step": 2926 }, { "epoch": 0.22563983965464077, "grad_norm": 3.846534490585327, "learning_rate": 9.029914860018399e-06, "loss": 1.0428, "step": 2927 }, { "epoch": 0.22571692876965774, "grad_norm": 3.627790689468384, "learning_rate": 9.029175730137611e-06, "loss": 0.996, "step": 2928 }, { "epoch": 0.22579401788467468, "grad_norm": 3.8956503868103027, "learning_rate": 9.028436349058584e-06, "loss": 1.1075, "step": 2929 }, { "epoch": 0.22587110699969165, "grad_norm": 4.157092571258545, "learning_rate": 9.027696716827416e-06, "loss": 1.0656, "step": 2930 }, { "epoch": 0.2259481961147086, "grad_norm": 3.8785336017608643, "learning_rate": 9.026956833490217e-06, "loss": 0.9546, "step": 2931 }, { "epoch": 0.22602528522972556, "grad_norm": 3.8939132690429688, "learning_rate": 9.026216699093114e-06, "loss": 1.0963, "step": 2932 }, { "epoch": 0.22610237434474253, "grad_norm": 3.9576644897460938, "learning_rate": 9.025476313682253e-06, "loss": 1.0504, "step": 2933 }, { "epoch": 0.22617946345975948, "grad_norm": 3.753843069076538, "learning_rate": 9.024735677303793e-06, "loss": 0.9969, "step": 2934 }, { "epoch": 0.22625655257477645, "grad_norm": 3.669050693511963, "learning_rate": 9.023994790003908e-06, "loss": 0.9765, "step": 2935 }, { "epoch": 0.2263336416897934, "grad_norm": 3.700514078140259, "learning_rate": 9.023253651828789e-06, "loss": 1.1164, "step": 2936 }, { "epoch": 0.22641073080481036, "grad_norm": 3.998882532119751, "learning_rate": 9.022512262824642e-06, "loss": 1.0073, "step": 2937 }, { "epoch": 0.22648781991982733, "grad_norm": 4.040007591247559, "learning_rate": 9.021770623037688e-06, "loss": 1.0537, "step": 2938 }, { "epoch": 0.22656490903484428, "grad_norm": 4.1160173416137695, "learning_rate": 9.021028732514166e-06, "loss": 1.1404, "step": 2939 }, { "epoch": 0.22664199814986125, "grad_norm": 3.754762887954712, "learning_rate": 9.020286591300325e-06, "loss": 1.045, "step": 2940 }, { "epoch": 0.2267190872648782, "grad_norm": 3.404191017150879, "learning_rate": 9.019544199442438e-06, "loss": 0.9894, "step": 2941 }, { "epoch": 0.22679617637989516, "grad_norm": 3.9192099571228027, "learning_rate": 9.01880155698679e-06, "loss": 0.9169, "step": 2942 }, { "epoch": 0.22687326549491213, "grad_norm": 3.7557027339935303, "learning_rate": 9.018058663979676e-06, "loss": 1.0611, "step": 2943 }, { "epoch": 0.22695035460992907, "grad_norm": 3.523135185241699, "learning_rate": 9.017315520467416e-06, "loss": 1.019, "step": 2944 }, { "epoch": 0.22702744372494604, "grad_norm": 3.6484501361846924, "learning_rate": 9.016572126496339e-06, "loss": 0.9376, "step": 2945 }, { "epoch": 0.22710453283996299, "grad_norm": 4.25970983505249, "learning_rate": 9.015828482112793e-06, "loss": 0.9697, "step": 2946 }, { "epoch": 0.22718162195497996, "grad_norm": 3.3932673931121826, "learning_rate": 9.01508458736314e-06, "loss": 0.9625, "step": 2947 }, { "epoch": 0.22725871106999693, "grad_norm": 3.8393819332122803, "learning_rate": 9.014340442293755e-06, "loss": 1.0045, "step": 2948 }, { "epoch": 0.22733580018501387, "grad_norm": 3.575310230255127, "learning_rate": 9.013596046951037e-06, "loss": 1.0233, "step": 2949 }, { "epoch": 0.22741288930003084, "grad_norm": 3.7742843627929688, "learning_rate": 9.012851401381391e-06, "loss": 0.992, "step": 2950 }, { "epoch": 0.22748997841504778, "grad_norm": 4.013235092163086, "learning_rate": 9.012106505631244e-06, "loss": 0.931, "step": 2951 }, { "epoch": 0.22756706753006475, "grad_norm": 3.5595037937164307, "learning_rate": 9.011361359747034e-06, "loss": 1.0418, "step": 2952 }, { "epoch": 0.22764415664508172, "grad_norm": 4.25527811050415, "learning_rate": 9.01061596377522e-06, "loss": 1.0631, "step": 2953 }, { "epoch": 0.22772124576009867, "grad_norm": 4.163780689239502, "learning_rate": 9.009870317762273e-06, "loss": 1.0558, "step": 2954 }, { "epoch": 0.22779833487511564, "grad_norm": 3.863081216812134, "learning_rate": 9.009124421754676e-06, "loss": 0.8664, "step": 2955 }, { "epoch": 0.22787542399013258, "grad_norm": 3.8147034645080566, "learning_rate": 9.008378275798939e-06, "loss": 1.0287, "step": 2956 }, { "epoch": 0.22795251310514955, "grad_norm": 4.305243968963623, "learning_rate": 9.007631879941575e-06, "loss": 1.1372, "step": 2957 }, { "epoch": 0.22802960222016652, "grad_norm": 3.846221685409546, "learning_rate": 9.006885234229119e-06, "loss": 0.9999, "step": 2958 }, { "epoch": 0.22810669133518346, "grad_norm": 3.8825647830963135, "learning_rate": 9.00613833870812e-06, "loss": 1.0099, "step": 2959 }, { "epoch": 0.22818378045020044, "grad_norm": 3.7892098426818848, "learning_rate": 9.005391193425144e-06, "loss": 0.9672, "step": 2960 }, { "epoch": 0.22826086956521738, "grad_norm": 4.183690547943115, "learning_rate": 9.004643798426772e-06, "loss": 0.9231, "step": 2961 }, { "epoch": 0.22833795868023435, "grad_norm": 3.8365590572357178, "learning_rate": 9.003896153759602e-06, "loss": 1.1283, "step": 2962 }, { "epoch": 0.22841504779525132, "grad_norm": 3.8340327739715576, "learning_rate": 9.00314825947024e-06, "loss": 0.957, "step": 2963 }, { "epoch": 0.22849213691026826, "grad_norm": 4.034872531890869, "learning_rate": 9.002400115605319e-06, "loss": 1.0195, "step": 2964 }, { "epoch": 0.22856922602528523, "grad_norm": 3.9130313396453857, "learning_rate": 9.00165172221148e-06, "loss": 1.1405, "step": 2965 }, { "epoch": 0.2286463151403022, "grad_norm": 3.705179214477539, "learning_rate": 9.00090307933538e-06, "loss": 1.0191, "step": 2966 }, { "epoch": 0.22872340425531915, "grad_norm": 3.985041618347168, "learning_rate": 9.000154187023696e-06, "loss": 1.0271, "step": 2967 }, { "epoch": 0.22880049337033612, "grad_norm": 3.9260165691375732, "learning_rate": 8.999405045323113e-06, "loss": 1.0308, "step": 2968 }, { "epoch": 0.22887758248535306, "grad_norm": 3.8115878105163574, "learning_rate": 8.99865565428034e-06, "loss": 0.9711, "step": 2969 }, { "epoch": 0.22895467160037003, "grad_norm": 3.6064395904541016, "learning_rate": 8.997906013942097e-06, "loss": 1.0217, "step": 2970 }, { "epoch": 0.229031760715387, "grad_norm": 3.8164539337158203, "learning_rate": 8.99715612435512e-06, "loss": 1.0237, "step": 2971 }, { "epoch": 0.22910884983040394, "grad_norm": 11.672921180725098, "learning_rate": 8.99640598556616e-06, "loss": 1.0373, "step": 2972 }, { "epoch": 0.22918593894542091, "grad_norm": 3.9608118534088135, "learning_rate": 8.995655597621985e-06, "loss": 1.0344, "step": 2973 }, { "epoch": 0.22926302806043786, "grad_norm": 3.9548451900482178, "learning_rate": 8.994904960569378e-06, "loss": 1.0988, "step": 2974 }, { "epoch": 0.22934011717545483, "grad_norm": 3.6159064769744873, "learning_rate": 8.994154074455135e-06, "loss": 1.0542, "step": 2975 }, { "epoch": 0.2294172062904718, "grad_norm": 3.7155611515045166, "learning_rate": 8.993402939326072e-06, "loss": 1.0449, "step": 2976 }, { "epoch": 0.22949429540548874, "grad_norm": 3.6567227840423584, "learning_rate": 8.99265155522902e-06, "loss": 0.9836, "step": 2977 }, { "epoch": 0.2295713845205057, "grad_norm": 3.778001546859741, "learning_rate": 8.991899922210821e-06, "loss": 1.0483, "step": 2978 }, { "epoch": 0.22964847363552265, "grad_norm": 4.267853260040283, "learning_rate": 8.991148040318335e-06, "loss": 0.9866, "step": 2979 }, { "epoch": 0.22972556275053962, "grad_norm": 3.8425397872924805, "learning_rate": 8.990395909598442e-06, "loss": 1.0059, "step": 2980 }, { "epoch": 0.2298026518655566, "grad_norm": 4.225255489349365, "learning_rate": 8.989643530098028e-06, "loss": 1.0807, "step": 2981 }, { "epoch": 0.22987974098057354, "grad_norm": 3.793910026550293, "learning_rate": 8.988890901864006e-06, "loss": 1.0763, "step": 2982 }, { "epoch": 0.2299568300955905, "grad_norm": 4.9717020988464355, "learning_rate": 8.988138024943294e-06, "loss": 1.0412, "step": 2983 }, { "epoch": 0.23003391921060745, "grad_norm": 3.8835487365722656, "learning_rate": 8.98738489938283e-06, "loss": 1.0469, "step": 2984 }, { "epoch": 0.23011100832562442, "grad_norm": 3.7455644607543945, "learning_rate": 8.98663152522957e-06, "loss": 0.8876, "step": 2985 }, { "epoch": 0.2301880974406414, "grad_norm": 4.213279724121094, "learning_rate": 8.985877902530482e-06, "loss": 1.0481, "step": 2986 }, { "epoch": 0.23026518655565834, "grad_norm": 3.730454921722412, "learning_rate": 8.985124031332549e-06, "loss": 1.1566, "step": 2987 }, { "epoch": 0.2303422756706753, "grad_norm": 3.9859375953674316, "learning_rate": 8.984369911682773e-06, "loss": 1.0923, "step": 2988 }, { "epoch": 0.23041936478569225, "grad_norm": 3.603957176208496, "learning_rate": 8.983615543628167e-06, "loss": 1.0712, "step": 2989 }, { "epoch": 0.23049645390070922, "grad_norm": 4.084186553955078, "learning_rate": 8.982860927215765e-06, "loss": 1.0741, "step": 2990 }, { "epoch": 0.2305735430157262, "grad_norm": 3.7829389572143555, "learning_rate": 8.982106062492613e-06, "loss": 1.1044, "step": 2991 }, { "epoch": 0.23065063213074313, "grad_norm": 4.031806468963623, "learning_rate": 8.98135094950577e-06, "loss": 1.0782, "step": 2992 }, { "epoch": 0.2307277212457601, "grad_norm": 4.211963653564453, "learning_rate": 8.980595588302315e-06, "loss": 1.0644, "step": 2993 }, { "epoch": 0.23080481036077705, "grad_norm": 3.739516258239746, "learning_rate": 8.979839978929342e-06, "loss": 1.0677, "step": 2994 }, { "epoch": 0.23088189947579402, "grad_norm": 3.528867483139038, "learning_rate": 8.979084121433958e-06, "loss": 0.9519, "step": 2995 }, { "epoch": 0.230958988590811, "grad_norm": 3.507763147354126, "learning_rate": 8.978328015863287e-06, "loss": 0.9619, "step": 2996 }, { "epoch": 0.23103607770582793, "grad_norm": 4.500532150268555, "learning_rate": 8.977571662264471e-06, "loss": 1.2321, "step": 2997 }, { "epoch": 0.2311131668208449, "grad_norm": 3.3748843669891357, "learning_rate": 8.976815060684659e-06, "loss": 0.999, "step": 2998 }, { "epoch": 0.23119025593586184, "grad_norm": 3.790412425994873, "learning_rate": 8.976058211171027e-06, "loss": 1.1291, "step": 2999 }, { "epoch": 0.23126734505087881, "grad_norm": 3.6807661056518555, "learning_rate": 8.975301113770756e-06, "loss": 0.9742, "step": 3000 }, { "epoch": 0.23134443416589578, "grad_norm": 3.902876138687134, "learning_rate": 8.97454376853105e-06, "loss": 1.1042, "step": 3001 }, { "epoch": 0.23142152328091273, "grad_norm": 3.766279935836792, "learning_rate": 8.973786175499123e-06, "loss": 1.112, "step": 3002 }, { "epoch": 0.2314986123959297, "grad_norm": 3.676820993423462, "learning_rate": 8.973028334722212e-06, "loss": 0.9986, "step": 3003 }, { "epoch": 0.23157570151094664, "grad_norm": 3.577911138534546, "learning_rate": 8.972270246247558e-06, "loss": 0.9868, "step": 3004 }, { "epoch": 0.2316527906259636, "grad_norm": 3.7404353618621826, "learning_rate": 8.97151191012243e-06, "loss": 1.1051, "step": 3005 }, { "epoch": 0.23172987974098058, "grad_norm": 3.7032761573791504, "learning_rate": 8.9707533263941e-06, "loss": 1.0054, "step": 3006 }, { "epoch": 0.23180696885599752, "grad_norm": 3.8245747089385986, "learning_rate": 8.969994495109868e-06, "loss": 1.0588, "step": 3007 }, { "epoch": 0.2318840579710145, "grad_norm": 3.9548802375793457, "learning_rate": 8.96923541631704e-06, "loss": 1.1028, "step": 3008 }, { "epoch": 0.23196114708603147, "grad_norm": 3.6920297145843506, "learning_rate": 8.968476090062941e-06, "loss": 0.9935, "step": 3009 }, { "epoch": 0.2320382362010484, "grad_norm": 3.5321335792541504, "learning_rate": 8.967716516394911e-06, "loss": 0.9922, "step": 3010 }, { "epoch": 0.23211532531606538, "grad_norm": 3.3093225955963135, "learning_rate": 8.966956695360304e-06, "loss": 0.9767, "step": 3011 }, { "epoch": 0.23219241443108232, "grad_norm": 3.7494637966156006, "learning_rate": 8.966196627006493e-06, "loss": 1.068, "step": 3012 }, { "epoch": 0.2322695035460993, "grad_norm": 3.80411696434021, "learning_rate": 8.965436311380866e-06, "loss": 1.0047, "step": 3013 }, { "epoch": 0.23234659266111626, "grad_norm": 3.954469919204712, "learning_rate": 8.96467574853082e-06, "loss": 1.0272, "step": 3014 }, { "epoch": 0.2324236817761332, "grad_norm": 4.207308292388916, "learning_rate": 8.963914938503777e-06, "loss": 1.0145, "step": 3015 }, { "epoch": 0.23250077089115018, "grad_norm": 3.938061237335205, "learning_rate": 8.963153881347164e-06, "loss": 0.9881, "step": 3016 }, { "epoch": 0.23257786000616712, "grad_norm": 3.8759334087371826, "learning_rate": 8.962392577108433e-06, "loss": 1.0464, "step": 3017 }, { "epoch": 0.2326549491211841, "grad_norm": 4.220122814178467, "learning_rate": 8.96163102583505e-06, "loss": 0.9949, "step": 3018 }, { "epoch": 0.23273203823620106, "grad_norm": 3.3575594425201416, "learning_rate": 8.960869227574486e-06, "loss": 1.0295, "step": 3019 }, { "epoch": 0.232809127351218, "grad_norm": 3.665529489517212, "learning_rate": 8.960107182374242e-06, "loss": 1.1279, "step": 3020 }, { "epoch": 0.23288621646623497, "grad_norm": 4.267427921295166, "learning_rate": 8.959344890281826e-06, "loss": 1.0118, "step": 3021 }, { "epoch": 0.23296330558125192, "grad_norm": 4.257380962371826, "learning_rate": 8.958582351344759e-06, "loss": 1.0997, "step": 3022 }, { "epoch": 0.2330403946962689, "grad_norm": 4.008502006530762, "learning_rate": 8.957819565610585e-06, "loss": 1.0658, "step": 3023 }, { "epoch": 0.23311748381128586, "grad_norm": 3.9444122314453125, "learning_rate": 8.957056533126861e-06, "loss": 1.0731, "step": 3024 }, { "epoch": 0.2331945729263028, "grad_norm": 4.073714256286621, "learning_rate": 8.956293253941155e-06, "loss": 1.0287, "step": 3025 }, { "epoch": 0.23327166204131977, "grad_norm": 4.01447057723999, "learning_rate": 8.955529728101055e-06, "loss": 1.1299, "step": 3026 }, { "epoch": 0.23334875115633671, "grad_norm": 4.0629706382751465, "learning_rate": 8.954765955654164e-06, "loss": 1.0278, "step": 3027 }, { "epoch": 0.23342584027135369, "grad_norm": 3.837249279022217, "learning_rate": 8.954001936648095e-06, "loss": 1.0456, "step": 3028 }, { "epoch": 0.23350292938637066, "grad_norm": 3.798231363296509, "learning_rate": 8.953237671130486e-06, "loss": 0.9937, "step": 3029 }, { "epoch": 0.2335800185013876, "grad_norm": 3.616384267807007, "learning_rate": 8.952473159148982e-06, "loss": 0.9586, "step": 3030 }, { "epoch": 0.23365710761640457, "grad_norm": 3.4887986183166504, "learning_rate": 8.951708400751246e-06, "loss": 0.997, "step": 3031 }, { "epoch": 0.2337341967314215, "grad_norm": 3.35778546333313, "learning_rate": 8.950943395984959e-06, "loss": 0.922, "step": 3032 }, { "epoch": 0.23381128584643848, "grad_norm": 4.111606121063232, "learning_rate": 8.950178144897814e-06, "loss": 1.1236, "step": 3033 }, { "epoch": 0.23388837496145545, "grad_norm": 3.4538538455963135, "learning_rate": 8.949412647537519e-06, "loss": 0.8232, "step": 3034 }, { "epoch": 0.2339654640764724, "grad_norm": 3.7339892387390137, "learning_rate": 8.948646903951801e-06, "loss": 0.9423, "step": 3035 }, { "epoch": 0.23404255319148937, "grad_norm": 4.029750347137451, "learning_rate": 8.947880914188397e-06, "loss": 1.1581, "step": 3036 }, { "epoch": 0.2341196423065063, "grad_norm": 3.9381988048553467, "learning_rate": 8.947114678295066e-06, "loss": 1.0831, "step": 3037 }, { "epoch": 0.23419673142152328, "grad_norm": 3.86995530128479, "learning_rate": 8.946348196319578e-06, "loss": 1.0168, "step": 3038 }, { "epoch": 0.23427382053654025, "grad_norm": 3.5996012687683105, "learning_rate": 8.945581468309718e-06, "loss": 0.9506, "step": 3039 }, { "epoch": 0.2343509096515572, "grad_norm": 4.0110368728637695, "learning_rate": 8.94481449431329e-06, "loss": 1.0381, "step": 3040 }, { "epoch": 0.23442799876657416, "grad_norm": 4.450444221496582, "learning_rate": 8.944047274378105e-06, "loss": 1.0377, "step": 3041 }, { "epoch": 0.2345050878815911, "grad_norm": 4.031339645385742, "learning_rate": 8.943279808552e-06, "loss": 1.059, "step": 3042 }, { "epoch": 0.23458217699660808, "grad_norm": 3.6864678859710693, "learning_rate": 8.942512096882825e-06, "loss": 1.034, "step": 3043 }, { "epoch": 0.23465926611162505, "grad_norm": 4.179306507110596, "learning_rate": 8.941744139418438e-06, "loss": 1.007, "step": 3044 }, { "epoch": 0.234736355226642, "grad_norm": 3.602816104888916, "learning_rate": 8.940975936206717e-06, "loss": 0.9409, "step": 3045 }, { "epoch": 0.23481344434165896, "grad_norm": 3.6022086143493652, "learning_rate": 8.94020748729556e-06, "loss": 1.0962, "step": 3046 }, { "epoch": 0.2348905334566759, "grad_norm": 3.6227774620056152, "learning_rate": 8.93943879273287e-06, "loss": 1.0189, "step": 3047 }, { "epoch": 0.23496762257169287, "grad_norm": 3.8395297527313232, "learning_rate": 8.938669852566578e-06, "loss": 0.9859, "step": 3048 }, { "epoch": 0.23504471168670985, "grad_norm": 3.932426929473877, "learning_rate": 8.937900666844616e-06, "loss": 1.0921, "step": 3049 }, { "epoch": 0.2351218008017268, "grad_norm": 3.409926414489746, "learning_rate": 8.937131235614945e-06, "loss": 0.9903, "step": 3050 }, { "epoch": 0.23519888991674376, "grad_norm": 3.986931800842285, "learning_rate": 8.93636155892553e-06, "loss": 1.0701, "step": 3051 }, { "epoch": 0.23527597903176073, "grad_norm": 3.698172092437744, "learning_rate": 8.935591636824359e-06, "loss": 1.0048, "step": 3052 }, { "epoch": 0.23535306814677767, "grad_norm": 3.6447818279266357, "learning_rate": 8.934821469359434e-06, "loss": 1.0963, "step": 3053 }, { "epoch": 0.23543015726179464, "grad_norm": 3.609434127807617, "learning_rate": 8.934051056578768e-06, "loss": 0.9999, "step": 3054 }, { "epoch": 0.23550724637681159, "grad_norm": 3.5842690467834473, "learning_rate": 8.933280398530394e-06, "loss": 1.1416, "step": 3055 }, { "epoch": 0.23558433549182856, "grad_norm": 3.902055501937866, "learning_rate": 8.932509495262357e-06, "loss": 0.9909, "step": 3056 }, { "epoch": 0.23566142460684553, "grad_norm": 4.051197528839111, "learning_rate": 8.931738346822723e-06, "loss": 1.0003, "step": 3057 }, { "epoch": 0.23573851372186247, "grad_norm": 3.764064311981201, "learning_rate": 8.930966953259563e-06, "loss": 1.0474, "step": 3058 }, { "epoch": 0.23581560283687944, "grad_norm": 3.6911094188690186, "learning_rate": 8.930195314620975e-06, "loss": 1.0557, "step": 3059 }, { "epoch": 0.23589269195189638, "grad_norm": 3.983623743057251, "learning_rate": 8.929423430955062e-06, "loss": 1.0087, "step": 3060 }, { "epoch": 0.23596978106691335, "grad_norm": 3.7596616744995117, "learning_rate": 8.92865130230995e-06, "loss": 1.0966, "step": 3061 }, { "epoch": 0.23604687018193032, "grad_norm": 3.731916904449463, "learning_rate": 8.927878928733777e-06, "loss": 1.0216, "step": 3062 }, { "epoch": 0.23612395929694727, "grad_norm": 3.540071487426758, "learning_rate": 8.927106310274695e-06, "loss": 0.9634, "step": 3063 }, { "epoch": 0.23620104841196424, "grad_norm": 3.379556655883789, "learning_rate": 8.926333446980873e-06, "loss": 0.9989, "step": 3064 }, { "epoch": 0.23627813752698118, "grad_norm": 3.504272937774658, "learning_rate": 8.925560338900496e-06, "loss": 0.9902, "step": 3065 }, { "epoch": 0.23635522664199815, "grad_norm": 3.81402325630188, "learning_rate": 8.924786986081764e-06, "loss": 1.0649, "step": 3066 }, { "epoch": 0.23643231575701512, "grad_norm": 3.7274930477142334, "learning_rate": 8.92401338857289e-06, "loss": 1.0921, "step": 3067 }, { "epoch": 0.23650940487203206, "grad_norm": 3.597395658493042, "learning_rate": 8.923239546422104e-06, "loss": 0.9825, "step": 3068 }, { "epoch": 0.23658649398704903, "grad_norm": 3.9207396507263184, "learning_rate": 8.922465459677649e-06, "loss": 0.974, "step": 3069 }, { "epoch": 0.23666358310206598, "grad_norm": 3.931241035461426, "learning_rate": 8.92169112838779e-06, "loss": 1.0103, "step": 3070 }, { "epoch": 0.23674067221708295, "grad_norm": 4.226929187774658, "learning_rate": 8.9209165526008e-06, "loss": 1.0043, "step": 3071 }, { "epoch": 0.23681776133209992, "grad_norm": 3.8953144550323486, "learning_rate": 8.920141732364971e-06, "loss": 1.0027, "step": 3072 }, { "epoch": 0.23689485044711686, "grad_norm": 3.6181445121765137, "learning_rate": 8.919366667728607e-06, "loss": 1.0596, "step": 3073 }, { "epoch": 0.23697193956213383, "grad_norm": 4.111177921295166, "learning_rate": 8.918591358740027e-06, "loss": 1.0282, "step": 3074 }, { "epoch": 0.23704902867715077, "grad_norm": 3.385059118270874, "learning_rate": 8.917815805447574e-06, "loss": 0.9483, "step": 3075 }, { "epoch": 0.23712611779216775, "grad_norm": 3.6011502742767334, "learning_rate": 8.917040007899596e-06, "loss": 1.0622, "step": 3076 }, { "epoch": 0.23720320690718472, "grad_norm": 3.875084161758423, "learning_rate": 8.91626396614446e-06, "loss": 0.9209, "step": 3077 }, { "epoch": 0.23728029602220166, "grad_norm": 3.934360980987549, "learning_rate": 8.91548768023055e-06, "loss": 1.0676, "step": 3078 }, { "epoch": 0.23735738513721863, "grad_norm": 3.5961403846740723, "learning_rate": 8.91471115020626e-06, "loss": 0.9208, "step": 3079 }, { "epoch": 0.23743447425223557, "grad_norm": 3.771339178085327, "learning_rate": 8.913934376120006e-06, "loss": 1.0828, "step": 3080 }, { "epoch": 0.23751156336725254, "grad_norm": 3.8882675170898438, "learning_rate": 8.913157358020212e-06, "loss": 1.0836, "step": 3081 }, { "epoch": 0.2375886524822695, "grad_norm": 4.413726329803467, "learning_rate": 8.912380095955326e-06, "loss": 1.0002, "step": 3082 }, { "epoch": 0.23766574159728646, "grad_norm": 4.3546953201293945, "learning_rate": 8.911602589973803e-06, "loss": 1.0706, "step": 3083 }, { "epoch": 0.23774283071230343, "grad_norm": 4.231818199157715, "learning_rate": 8.910824840124116e-06, "loss": 1.104, "step": 3084 }, { "epoch": 0.23781991982732037, "grad_norm": 3.49495267868042, "learning_rate": 8.910046846454756e-06, "loss": 0.9728, "step": 3085 }, { "epoch": 0.23789700894233734, "grad_norm": 3.724919080734253, "learning_rate": 8.909268609014228e-06, "loss": 1.0903, "step": 3086 }, { "epoch": 0.2379740980573543, "grad_norm": 4.132431983947754, "learning_rate": 8.908490127851047e-06, "loss": 1.0781, "step": 3087 }, { "epoch": 0.23805118717237125, "grad_norm": 3.652280330657959, "learning_rate": 8.907711403013748e-06, "loss": 1.0157, "step": 3088 }, { "epoch": 0.23812827628738822, "grad_norm": 3.8804502487182617, "learning_rate": 8.90693243455088e-06, "loss": 1.0803, "step": 3089 }, { "epoch": 0.23820536540240517, "grad_norm": 3.9575884342193604, "learning_rate": 8.906153222511014e-06, "loss": 1.0079, "step": 3090 }, { "epoch": 0.23828245451742214, "grad_norm": 3.4292240142822266, "learning_rate": 8.905373766942722e-06, "loss": 1.0367, "step": 3091 }, { "epoch": 0.2383595436324391, "grad_norm": 3.641467332839966, "learning_rate": 8.904594067894603e-06, "loss": 1.0492, "step": 3092 }, { "epoch": 0.23843663274745605, "grad_norm": 3.656708240509033, "learning_rate": 8.903814125415267e-06, "loss": 1.0476, "step": 3093 }, { "epoch": 0.23851372186247302, "grad_norm": 3.6211674213409424, "learning_rate": 8.903033939553336e-06, "loss": 1.0771, "step": 3094 }, { "epoch": 0.23859081097749, "grad_norm": 3.7082033157348633, "learning_rate": 8.902253510357455e-06, "loss": 1.0533, "step": 3095 }, { "epoch": 0.23866790009250693, "grad_norm": 3.7945261001586914, "learning_rate": 8.90147283787628e-06, "loss": 0.9977, "step": 3096 }, { "epoch": 0.2387449892075239, "grad_norm": 3.3629956245422363, "learning_rate": 8.900691922158476e-06, "loss": 0.8274, "step": 3097 }, { "epoch": 0.23882207832254085, "grad_norm": 3.971607208251953, "learning_rate": 8.899910763252735e-06, "loss": 1.1203, "step": 3098 }, { "epoch": 0.23889916743755782, "grad_norm": 3.6717000007629395, "learning_rate": 8.899129361207754e-06, "loss": 0.9675, "step": 3099 }, { "epoch": 0.2389762565525748, "grad_norm": 3.761371374130249, "learning_rate": 8.898347716072254e-06, "loss": 0.9512, "step": 3100 }, { "epoch": 0.23905334566759173, "grad_norm": 4.109459400177002, "learning_rate": 8.897565827894963e-06, "loss": 1.1068, "step": 3101 }, { "epoch": 0.2391304347826087, "grad_norm": 3.7636146545410156, "learning_rate": 8.89678369672463e-06, "loss": 0.9598, "step": 3102 }, { "epoch": 0.23920752389762565, "grad_norm": 3.6051504611968994, "learning_rate": 8.896001322610013e-06, "loss": 1.0457, "step": 3103 }, { "epoch": 0.23928461301264262, "grad_norm": 3.4127960205078125, "learning_rate": 8.895218705599894e-06, "loss": 0.8803, "step": 3104 }, { "epoch": 0.2393617021276596, "grad_norm": 3.451889991760254, "learning_rate": 8.89443584574306e-06, "loss": 0.8208, "step": 3105 }, { "epoch": 0.23943879124267653, "grad_norm": 3.950897455215454, "learning_rate": 8.893652743088321e-06, "loss": 1.0732, "step": 3106 }, { "epoch": 0.2395158803576935, "grad_norm": 3.8806371688842773, "learning_rate": 8.8928693976845e-06, "loss": 1.0772, "step": 3107 }, { "epoch": 0.23959296947271044, "grad_norm": 3.3952512741088867, "learning_rate": 8.892085809580435e-06, "loss": 0.998, "step": 3108 }, { "epoch": 0.2396700585877274, "grad_norm": 4.141806602478027, "learning_rate": 8.891301978824975e-06, "loss": 1.0228, "step": 3109 }, { "epoch": 0.23974714770274438, "grad_norm": 4.028316974639893, "learning_rate": 8.890517905466991e-06, "loss": 0.9701, "step": 3110 }, { "epoch": 0.23982423681776133, "grad_norm": 3.646756172180176, "learning_rate": 8.889733589555368e-06, "loss": 1.0763, "step": 3111 }, { "epoch": 0.2399013259327783, "grad_norm": 3.909493923187256, "learning_rate": 8.888949031138997e-06, "loss": 1.0528, "step": 3112 }, { "epoch": 0.23997841504779524, "grad_norm": 3.6650424003601074, "learning_rate": 8.888164230266798e-06, "loss": 1.015, "step": 3113 }, { "epoch": 0.2400555041628122, "grad_norm": 3.6798250675201416, "learning_rate": 8.887379186987695e-06, "loss": 0.9954, "step": 3114 }, { "epoch": 0.24013259327782918, "grad_norm": 4.122318744659424, "learning_rate": 8.886593901350632e-06, "loss": 1.1222, "step": 3115 }, { "epoch": 0.24020968239284612, "grad_norm": 3.966783046722412, "learning_rate": 8.885808373404572e-06, "loss": 0.9446, "step": 3116 }, { "epoch": 0.2402867715078631, "grad_norm": 3.8579766750335693, "learning_rate": 8.885022603198481e-06, "loss": 1.0659, "step": 3117 }, { "epoch": 0.24036386062288004, "grad_norm": 3.9546401500701904, "learning_rate": 8.884236590781354e-06, "loss": 1.054, "step": 3118 }, { "epoch": 0.240440949737897, "grad_norm": 3.887936592102051, "learning_rate": 8.883450336202194e-06, "loss": 1.0296, "step": 3119 }, { "epoch": 0.24051803885291398, "grad_norm": 3.620002031326294, "learning_rate": 8.882663839510017e-06, "loss": 0.8848, "step": 3120 }, { "epoch": 0.24059512796793092, "grad_norm": 4.082600116729736, "learning_rate": 8.881877100753857e-06, "loss": 1.0739, "step": 3121 }, { "epoch": 0.2406722170829479, "grad_norm": 4.3715500831604, "learning_rate": 8.881090119982766e-06, "loss": 1.1382, "step": 3122 }, { "epoch": 0.24074930619796484, "grad_norm": 3.6317408084869385, "learning_rate": 8.880302897245805e-06, "loss": 1.0448, "step": 3123 }, { "epoch": 0.2408263953129818, "grad_norm": 3.779918909072876, "learning_rate": 8.879515432592057e-06, "loss": 1.0362, "step": 3124 }, { "epoch": 0.24090348442799878, "grad_norm": 3.709547519683838, "learning_rate": 8.878727726070614e-06, "loss": 0.8987, "step": 3125 }, { "epoch": 0.24098057354301572, "grad_norm": 3.8204429149627686, "learning_rate": 8.877939777730585e-06, "loss": 1.0463, "step": 3126 }, { "epoch": 0.2410576626580327, "grad_norm": 3.505138874053955, "learning_rate": 8.877151587621096e-06, "loss": 1.0174, "step": 3127 }, { "epoch": 0.24113475177304963, "grad_norm": 3.879589557647705, "learning_rate": 8.876363155791286e-06, "loss": 1.0325, "step": 3128 }, { "epoch": 0.2412118408880666, "grad_norm": 3.7704927921295166, "learning_rate": 8.875574482290308e-06, "loss": 1.0305, "step": 3129 }, { "epoch": 0.24128893000308357, "grad_norm": 3.636652946472168, "learning_rate": 8.874785567167335e-06, "loss": 0.9645, "step": 3130 }, { "epoch": 0.24136601911810052, "grad_norm": 3.62375545501709, "learning_rate": 8.873996410471549e-06, "loss": 1.071, "step": 3131 }, { "epoch": 0.2414431082331175, "grad_norm": 3.3911542892456055, "learning_rate": 8.87320701225215e-06, "loss": 1.0456, "step": 3132 }, { "epoch": 0.24152019734813443, "grad_norm": 3.846412181854248, "learning_rate": 8.872417372558353e-06, "loss": 0.9716, "step": 3133 }, { "epoch": 0.2415972864631514, "grad_norm": 3.8464903831481934, "learning_rate": 8.871627491439389e-06, "loss": 0.9607, "step": 3134 }, { "epoch": 0.24167437557816837, "grad_norm": 3.595994234085083, "learning_rate": 8.870837368944505e-06, "loss": 1.0328, "step": 3135 }, { "epoch": 0.24175146469318531, "grad_norm": 3.919416904449463, "learning_rate": 8.870047005122957e-06, "loss": 1.1371, "step": 3136 }, { "epoch": 0.24182855380820228, "grad_norm": 4.039884567260742, "learning_rate": 8.869256400024023e-06, "loss": 1.0813, "step": 3137 }, { "epoch": 0.24190564292321926, "grad_norm": 3.456634044647217, "learning_rate": 8.86846555369699e-06, "loss": 1.0678, "step": 3138 }, { "epoch": 0.2419827320382362, "grad_norm": 3.9659950733184814, "learning_rate": 8.867674466191164e-06, "loss": 1.0229, "step": 3139 }, { "epoch": 0.24205982115325317, "grad_norm": 3.6779067516326904, "learning_rate": 8.866883137555869e-06, "loss": 0.9999, "step": 3140 }, { "epoch": 0.2421369102682701, "grad_norm": 3.332307815551758, "learning_rate": 8.866091567840436e-06, "loss": 1.0288, "step": 3141 }, { "epoch": 0.24221399938328708, "grad_norm": 3.756145477294922, "learning_rate": 8.865299757094217e-06, "loss": 1.0806, "step": 3142 }, { "epoch": 0.24229108849830405, "grad_norm": 3.585340976715088, "learning_rate": 8.864507705366577e-06, "loss": 0.9627, "step": 3143 }, { "epoch": 0.242368177613321, "grad_norm": 3.905465841293335, "learning_rate": 8.863715412706897e-06, "loss": 1.0693, "step": 3144 }, { "epoch": 0.24244526672833797, "grad_norm": 4.029237270355225, "learning_rate": 8.862922879164571e-06, "loss": 1.0433, "step": 3145 }, { "epoch": 0.2425223558433549, "grad_norm": 3.680603265762329, "learning_rate": 8.86213010478901e-06, "loss": 0.9543, "step": 3146 }, { "epoch": 0.24259944495837188, "grad_norm": 3.9981207847595215, "learning_rate": 8.86133708962964e-06, "loss": 0.9039, "step": 3147 }, { "epoch": 0.24267653407338885, "grad_norm": 4.111728191375732, "learning_rate": 8.860543833735902e-06, "loss": 0.9712, "step": 3148 }, { "epoch": 0.2427536231884058, "grad_norm": 4.376039028167725, "learning_rate": 8.859750337157247e-06, "loss": 0.9934, "step": 3149 }, { "epoch": 0.24283071230342276, "grad_norm": 3.6740853786468506, "learning_rate": 8.858956599943151e-06, "loss": 1.117, "step": 3150 }, { "epoch": 0.2429078014184397, "grad_norm": 3.9488065242767334, "learning_rate": 8.858162622143096e-06, "loss": 0.9975, "step": 3151 }, { "epoch": 0.24298489053345668, "grad_norm": 3.6119964122772217, "learning_rate": 8.857368403806586e-06, "loss": 1.0049, "step": 3152 }, { "epoch": 0.24306197964847365, "grad_norm": 3.641690731048584, "learning_rate": 8.856573944983131e-06, "loss": 1.03, "step": 3153 }, { "epoch": 0.2431390687634906, "grad_norm": 4.013689041137695, "learning_rate": 8.855779245722264e-06, "loss": 1.0664, "step": 3154 }, { "epoch": 0.24321615787850756, "grad_norm": 3.818681478500366, "learning_rate": 8.854984306073533e-06, "loss": 0.9579, "step": 3155 }, { "epoch": 0.2432932469935245, "grad_norm": 4.519876003265381, "learning_rate": 8.854189126086494e-06, "loss": 1.01, "step": 3156 }, { "epoch": 0.24337033610854147, "grad_norm": 4.186661243438721, "learning_rate": 8.853393705810725e-06, "loss": 0.9834, "step": 3157 }, { "epoch": 0.24344742522355844, "grad_norm": 3.6581454277038574, "learning_rate": 8.852598045295815e-06, "loss": 1.0094, "step": 3158 }, { "epoch": 0.2435245143385754, "grad_norm": 3.646156072616577, "learning_rate": 8.851802144591371e-06, "loss": 1.016, "step": 3159 }, { "epoch": 0.24360160345359236, "grad_norm": 3.637533187866211, "learning_rate": 8.851006003747013e-06, "loss": 1.0252, "step": 3160 }, { "epoch": 0.2436786925686093, "grad_norm": 3.3992221355438232, "learning_rate": 8.850209622812374e-06, "loss": 0.9184, "step": 3161 }, { "epoch": 0.24375578168362627, "grad_norm": 3.7890117168426514, "learning_rate": 8.849413001837105e-06, "loss": 1.0386, "step": 3162 }, { "epoch": 0.24383287079864324, "grad_norm": 3.5062248706817627, "learning_rate": 8.848616140870875e-06, "loss": 1.0384, "step": 3163 }, { "epoch": 0.24390995991366018, "grad_norm": 3.830472469329834, "learning_rate": 8.847819039963359e-06, "loss": 1.0629, "step": 3164 }, { "epoch": 0.24398704902867716, "grad_norm": 3.8684401512145996, "learning_rate": 8.847021699164255e-06, "loss": 1.0184, "step": 3165 }, { "epoch": 0.2440641381436941, "grad_norm": 3.6413917541503906, "learning_rate": 8.846224118523271e-06, "loss": 1.0626, "step": 3166 }, { "epoch": 0.24414122725871107, "grad_norm": 3.6060991287231445, "learning_rate": 8.845426298090133e-06, "loss": 1.1064, "step": 3167 }, { "epoch": 0.24421831637372804, "grad_norm": 3.7429842948913574, "learning_rate": 8.844628237914584e-06, "loss": 0.9361, "step": 3168 }, { "epoch": 0.24429540548874498, "grad_norm": 3.62557053565979, "learning_rate": 8.843829938046376e-06, "loss": 0.9631, "step": 3169 }, { "epoch": 0.24437249460376195, "grad_norm": 3.561077833175659, "learning_rate": 8.843031398535276e-06, "loss": 1.0517, "step": 3170 }, { "epoch": 0.2444495837187789, "grad_norm": 3.694340467453003, "learning_rate": 8.842232619431075e-06, "loss": 0.9603, "step": 3171 }, { "epoch": 0.24452667283379587, "grad_norm": 3.8666601181030273, "learning_rate": 8.841433600783568e-06, "loss": 1.0499, "step": 3172 }, { "epoch": 0.24460376194881284, "grad_norm": 3.9767086505889893, "learning_rate": 8.840634342642572e-06, "loss": 1.0151, "step": 3173 }, { "epoch": 0.24468085106382978, "grad_norm": 3.6622021198272705, "learning_rate": 8.839834845057915e-06, "loss": 0.9359, "step": 3174 }, { "epoch": 0.24475794017884675, "grad_norm": 4.173803329467773, "learning_rate": 8.839035108079443e-06, "loss": 1.0276, "step": 3175 }, { "epoch": 0.2448350292938637, "grad_norm": 3.558023691177368, "learning_rate": 8.838235131757014e-06, "loss": 0.9519, "step": 3176 }, { "epoch": 0.24491211840888066, "grad_norm": 3.633117914199829, "learning_rate": 8.837434916140504e-06, "loss": 0.9188, "step": 3177 }, { "epoch": 0.24498920752389763, "grad_norm": 3.513375997543335, "learning_rate": 8.8366344612798e-06, "loss": 1.0165, "step": 3178 }, { "epoch": 0.24506629663891458, "grad_norm": 3.9629836082458496, "learning_rate": 8.835833767224811e-06, "loss": 1.0958, "step": 3179 }, { "epoch": 0.24514338575393155, "grad_norm": 3.611243724822998, "learning_rate": 8.83503283402545e-06, "loss": 1.1016, "step": 3180 }, { "epoch": 0.24522047486894852, "grad_norm": 3.9855403900146484, "learning_rate": 8.834231661731656e-06, "loss": 1.1526, "step": 3181 }, { "epoch": 0.24529756398396546, "grad_norm": 3.8271896839141846, "learning_rate": 8.833430250393376e-06, "loss": 0.8604, "step": 3182 }, { "epoch": 0.24537465309898243, "grad_norm": 4.085109710693359, "learning_rate": 8.832628600060572e-06, "loss": 1.0036, "step": 3183 }, { "epoch": 0.24545174221399937, "grad_norm": 3.691535711288452, "learning_rate": 8.831826710783226e-06, "loss": 1.1316, "step": 3184 }, { "epoch": 0.24552883132901634, "grad_norm": 3.610386371612549, "learning_rate": 8.83102458261133e-06, "loss": 1.0648, "step": 3185 }, { "epoch": 0.24560592044403332, "grad_norm": 3.5450527667999268, "learning_rate": 8.83022221559489e-06, "loss": 1.0361, "step": 3186 }, { "epoch": 0.24568300955905026, "grad_norm": 3.520220994949341, "learning_rate": 8.829419609783936e-06, "loss": 0.8683, "step": 3187 }, { "epoch": 0.24576009867406723, "grad_norm": 3.9032230377197266, "learning_rate": 8.8286167652285e-06, "loss": 1.0631, "step": 3188 }, { "epoch": 0.24583718778908417, "grad_norm": 3.8549489974975586, "learning_rate": 8.82781368197864e-06, "loss": 1.0082, "step": 3189 }, { "epoch": 0.24591427690410114, "grad_norm": 3.810291051864624, "learning_rate": 8.827010360084419e-06, "loss": 0.9334, "step": 3190 }, { "epoch": 0.2459913660191181, "grad_norm": 3.9244349002838135, "learning_rate": 8.826206799595922e-06, "loss": 1.0369, "step": 3191 }, { "epoch": 0.24606845513413506, "grad_norm": 3.6987247467041016, "learning_rate": 8.825403000563248e-06, "loss": 0.963, "step": 3192 }, { "epoch": 0.24614554424915203, "grad_norm": 3.6557397842407227, "learning_rate": 8.824598963036511e-06, "loss": 0.9969, "step": 3193 }, { "epoch": 0.24622263336416897, "grad_norm": 3.776524066925049, "learning_rate": 8.823794687065836e-06, "loss": 1.0117, "step": 3194 }, { "epoch": 0.24629972247918594, "grad_norm": 3.713355779647827, "learning_rate": 8.822990172701367e-06, "loss": 0.9675, "step": 3195 }, { "epoch": 0.2463768115942029, "grad_norm": 3.96952748298645, "learning_rate": 8.82218541999326e-06, "loss": 0.9109, "step": 3196 }, { "epoch": 0.24645390070921985, "grad_norm": 3.8969883918762207, "learning_rate": 8.821380428991686e-06, "loss": 1.07, "step": 3197 }, { "epoch": 0.24653098982423682, "grad_norm": 3.5697784423828125, "learning_rate": 8.820575199746835e-06, "loss": 0.9745, "step": 3198 }, { "epoch": 0.24660807893925377, "grad_norm": 3.8984580039978027, "learning_rate": 8.819769732308908e-06, "loss": 1.0264, "step": 3199 }, { "epoch": 0.24668516805427074, "grad_norm": 4.095597267150879, "learning_rate": 8.818964026728121e-06, "loss": 1.1447, "step": 3200 }, { "epoch": 0.2467622571692877, "grad_norm": 3.7202227115631104, "learning_rate": 8.818158083054706e-06, "loss": 0.9318, "step": 3201 }, { "epoch": 0.24683934628430465, "grad_norm": 3.9623115062713623, "learning_rate": 8.817351901338908e-06, "loss": 1.1674, "step": 3202 }, { "epoch": 0.24691643539932162, "grad_norm": 4.0379862785339355, "learning_rate": 8.816545481630992e-06, "loss": 0.9857, "step": 3203 }, { "epoch": 0.24699352451433856, "grad_norm": 3.6850240230560303, "learning_rate": 8.815738823981228e-06, "loss": 0.9235, "step": 3204 }, { "epoch": 0.24707061362935553, "grad_norm": 3.994870185852051, "learning_rate": 8.814931928439914e-06, "loss": 1.0827, "step": 3205 }, { "epoch": 0.2471477027443725, "grad_norm": 3.5296335220336914, "learning_rate": 8.81412479505735e-06, "loss": 1.06, "step": 3206 }, { "epoch": 0.24722479185938945, "grad_norm": 3.7254252433776855, "learning_rate": 8.81331742388386e-06, "loss": 1.0456, "step": 3207 }, { "epoch": 0.24730188097440642, "grad_norm": 4.138040542602539, "learning_rate": 8.81250981496978e-06, "loss": 1.0802, "step": 3208 }, { "epoch": 0.24737897008942336, "grad_norm": 4.0760884284973145, "learning_rate": 8.811701968365455e-06, "loss": 1.0602, "step": 3209 }, { "epoch": 0.24745605920444033, "grad_norm": 3.428788185119629, "learning_rate": 8.810893884121255e-06, "loss": 0.9812, "step": 3210 }, { "epoch": 0.2475331483194573, "grad_norm": 3.9063422679901123, "learning_rate": 8.81008556228756e-06, "loss": 0.9296, "step": 3211 }, { "epoch": 0.24761023743447425, "grad_norm": 3.583909749984741, "learning_rate": 8.809277002914762e-06, "loss": 1.0541, "step": 3212 }, { "epoch": 0.24768732654949122, "grad_norm": 4.865204811096191, "learning_rate": 8.808468206053271e-06, "loss": 1.006, "step": 3213 }, { "epoch": 0.24776441566450816, "grad_norm": 3.553562641143799, "learning_rate": 8.807659171753513e-06, "loss": 0.9913, "step": 3214 }, { "epoch": 0.24784150477952513, "grad_norm": 3.982586145401001, "learning_rate": 8.806849900065925e-06, "loss": 1.0069, "step": 3215 }, { "epoch": 0.2479185938945421, "grad_norm": 4.025597095489502, "learning_rate": 8.806040391040962e-06, "loss": 0.9886, "step": 3216 }, { "epoch": 0.24799568300955904, "grad_norm": 3.7594804763793945, "learning_rate": 8.805230644729093e-06, "loss": 0.9497, "step": 3217 }, { "epoch": 0.248072772124576, "grad_norm": 3.5578064918518066, "learning_rate": 8.804420661180801e-06, "loss": 0.9121, "step": 3218 }, { "epoch": 0.24814986123959296, "grad_norm": 3.8382201194763184, "learning_rate": 8.803610440446584e-06, "loss": 1.0003, "step": 3219 }, { "epoch": 0.24822695035460993, "grad_norm": 4.217833995819092, "learning_rate": 8.802799982576956e-06, "loss": 1.1424, "step": 3220 }, { "epoch": 0.2483040394696269, "grad_norm": 3.7423839569091797, "learning_rate": 8.801989287622443e-06, "loss": 1.0629, "step": 3221 }, { "epoch": 0.24838112858464384, "grad_norm": 3.7119791507720947, "learning_rate": 8.801178355633591e-06, "loss": 1.1239, "step": 3222 }, { "epoch": 0.2484582176996608, "grad_norm": 3.545254707336426, "learning_rate": 8.800367186660955e-06, "loss": 1.0791, "step": 3223 }, { "epoch": 0.24853530681467778, "grad_norm": 3.572874069213867, "learning_rate": 8.799555780755108e-06, "loss": 1.0352, "step": 3224 }, { "epoch": 0.24861239592969472, "grad_norm": 4.043218612670898, "learning_rate": 8.798744137966634e-06, "loss": 1.0846, "step": 3225 }, { "epoch": 0.2486894850447117, "grad_norm": 3.8153271675109863, "learning_rate": 8.797932258346138e-06, "loss": 1.0246, "step": 3226 }, { "epoch": 0.24876657415972864, "grad_norm": 3.6751506328582764, "learning_rate": 8.797120141944237e-06, "loss": 0.9848, "step": 3227 }, { "epoch": 0.2488436632747456, "grad_norm": 3.716115713119507, "learning_rate": 8.79630778881156e-06, "loss": 0.899, "step": 3228 }, { "epoch": 0.24892075238976258, "grad_norm": 3.8847246170043945, "learning_rate": 8.795495198998753e-06, "loss": 1.0613, "step": 3229 }, { "epoch": 0.24899784150477952, "grad_norm": 3.672245502471924, "learning_rate": 8.79468237255648e-06, "loss": 1.0189, "step": 3230 }, { "epoch": 0.2490749306197965, "grad_norm": 4.07139253616333, "learning_rate": 8.793869309535411e-06, "loss": 1.1237, "step": 3231 }, { "epoch": 0.24915201973481343, "grad_norm": 3.6188600063323975, "learning_rate": 8.793056009986243e-06, "loss": 0.9832, "step": 3232 }, { "epoch": 0.2492291088498304, "grad_norm": 3.553046703338623, "learning_rate": 8.792242473959676e-06, "loss": 1.0778, "step": 3233 }, { "epoch": 0.24930619796484738, "grad_norm": 3.8086423873901367, "learning_rate": 8.791428701506433e-06, "loss": 1.0775, "step": 3234 }, { "epoch": 0.24938328707986432, "grad_norm": 3.5978622436523438, "learning_rate": 8.790614692677244e-06, "loss": 1.0341, "step": 3235 }, { "epoch": 0.2494603761948813, "grad_norm": 3.933943271636963, "learning_rate": 8.789800447522863e-06, "loss": 1.0398, "step": 3236 }, { "epoch": 0.24953746530989823, "grad_norm": 3.6028807163238525, "learning_rate": 8.78898596609405e-06, "loss": 0.9908, "step": 3237 }, { "epoch": 0.2496145544249152, "grad_norm": 3.7010693550109863, "learning_rate": 8.788171248441588e-06, "loss": 0.9977, "step": 3238 }, { "epoch": 0.24969164353993217, "grad_norm": 3.41683292388916, "learning_rate": 8.787356294616266e-06, "loss": 0.9295, "step": 3239 }, { "epoch": 0.24976873265494912, "grad_norm": 3.393082618713379, "learning_rate": 8.786541104668895e-06, "loss": 0.9624, "step": 3240 }, { "epoch": 0.2498458217699661, "grad_norm": 3.504213333129883, "learning_rate": 8.785725678650298e-06, "loss": 1.0289, "step": 3241 }, { "epoch": 0.24992291088498303, "grad_norm": 3.6844801902770996, "learning_rate": 8.78491001661131e-06, "loss": 0.9146, "step": 3242 }, { "epoch": 0.25, "grad_norm": 3.710338830947876, "learning_rate": 8.784094118602788e-06, "loss": 1.0341, "step": 3243 }, { "epoch": 0.25007708911501697, "grad_norm": 4.066130638122559, "learning_rate": 8.783277984675593e-06, "loss": 1.0393, "step": 3244 }, { "epoch": 0.25015417823003394, "grad_norm": 3.7111411094665527, "learning_rate": 8.782461614880611e-06, "loss": 1.0909, "step": 3245 }, { "epoch": 0.25023126734505086, "grad_norm": 3.810129404067993, "learning_rate": 8.781645009268738e-06, "loss": 1.0154, "step": 3246 }, { "epoch": 0.2503083564600678, "grad_norm": 3.654636859893799, "learning_rate": 8.780828167890882e-06, "loss": 0.9366, "step": 3247 }, { "epoch": 0.2503854455750848, "grad_norm": 3.8680331707000732, "learning_rate": 8.780011090797974e-06, "loss": 1.0582, "step": 3248 }, { "epoch": 0.25046253469010177, "grad_norm": 3.812549591064453, "learning_rate": 8.779193778040948e-06, "loss": 0.9273, "step": 3249 }, { "epoch": 0.25053962380511874, "grad_norm": 3.7380712032318115, "learning_rate": 8.778376229670766e-06, "loss": 0.9623, "step": 3250 }, { "epoch": 0.25061671292013565, "grad_norm": 4.139057159423828, "learning_rate": 8.777558445738394e-06, "loss": 1.1061, "step": 3251 }, { "epoch": 0.2506938020351526, "grad_norm": 3.806485891342163, "learning_rate": 8.776740426294818e-06, "loss": 1.0968, "step": 3252 }, { "epoch": 0.2507708911501696, "grad_norm": 3.924656867980957, "learning_rate": 8.775922171391035e-06, "loss": 1.0496, "step": 3253 }, { "epoch": 0.25084798026518657, "grad_norm": 3.7743098735809326, "learning_rate": 8.775103681078063e-06, "loss": 1.1391, "step": 3254 }, { "epoch": 0.25092506938020354, "grad_norm": 3.758241653442383, "learning_rate": 8.774284955406925e-06, "loss": 0.9986, "step": 3255 }, { "epoch": 0.25100215849522045, "grad_norm": 3.812593460083008, "learning_rate": 8.77346599442867e-06, "loss": 0.9571, "step": 3256 }, { "epoch": 0.2510792476102374, "grad_norm": 3.9296791553497314, "learning_rate": 8.772646798194353e-06, "loss": 1.0788, "step": 3257 }, { "epoch": 0.2511563367252544, "grad_norm": 3.9175896644592285, "learning_rate": 8.771827366755046e-06, "loss": 1.1493, "step": 3258 }, { "epoch": 0.25123342584027136, "grad_norm": 3.4963676929473877, "learning_rate": 8.771007700161839e-06, "loss": 1.0635, "step": 3259 }, { "epoch": 0.25131051495528833, "grad_norm": 3.776111602783203, "learning_rate": 8.770187798465832e-06, "loss": 1.0474, "step": 3260 }, { "epoch": 0.25138760407030525, "grad_norm": 3.916110038757324, "learning_rate": 8.769367661718143e-06, "loss": 1.0697, "step": 3261 }, { "epoch": 0.2514646931853222, "grad_norm": 3.6865622997283936, "learning_rate": 8.7685472899699e-06, "loss": 0.9469, "step": 3262 }, { "epoch": 0.2515417823003392, "grad_norm": 4.2629594802856445, "learning_rate": 8.767726683272253e-06, "loss": 1.1568, "step": 3263 }, { "epoch": 0.25161887141535616, "grad_norm": 3.736191511154175, "learning_rate": 8.766905841676361e-06, "loss": 1.0092, "step": 3264 }, { "epoch": 0.25169596053037313, "grad_norm": 3.6174426078796387, "learning_rate": 8.766084765233399e-06, "loss": 0.9476, "step": 3265 }, { "epoch": 0.25177304964539005, "grad_norm": 3.8853113651275635, "learning_rate": 8.765263453994556e-06, "loss": 0.9572, "step": 3266 }, { "epoch": 0.251850138760407, "grad_norm": 5.05413293838501, "learning_rate": 8.764441908011038e-06, "loss": 1.0719, "step": 3267 }, { "epoch": 0.251927227875424, "grad_norm": 3.739021062850952, "learning_rate": 8.763620127334063e-06, "loss": 0.9814, "step": 3268 }, { "epoch": 0.25200431699044096, "grad_norm": 3.654526948928833, "learning_rate": 8.762798112014867e-06, "loss": 1.0566, "step": 3269 }, { "epoch": 0.25208140610545793, "grad_norm": 4.029781818389893, "learning_rate": 8.761975862104694e-06, "loss": 1.1511, "step": 3270 }, { "epoch": 0.25215849522047484, "grad_norm": 4.01661491394043, "learning_rate": 8.761153377654811e-06, "loss": 1.0468, "step": 3271 }, { "epoch": 0.2522355843354918, "grad_norm": 3.568437337875366, "learning_rate": 8.760330658716497e-06, "loss": 0.9401, "step": 3272 }, { "epoch": 0.2523126734505088, "grad_norm": 3.7211058139801025, "learning_rate": 8.75950770534104e-06, "loss": 1.0863, "step": 3273 }, { "epoch": 0.25238976256552575, "grad_norm": 3.9265799522399902, "learning_rate": 8.758684517579746e-06, "loss": 1.0554, "step": 3274 }, { "epoch": 0.2524668516805427, "grad_norm": 3.979198694229126, "learning_rate": 8.757861095483942e-06, "loss": 1.0998, "step": 3275 }, { "epoch": 0.25254394079555964, "grad_norm": 3.5346436500549316, "learning_rate": 8.75703743910496e-06, "loss": 0.9852, "step": 3276 }, { "epoch": 0.2526210299105766, "grad_norm": 3.676072835922241, "learning_rate": 8.756213548494152e-06, "loss": 0.9912, "step": 3277 }, { "epoch": 0.2526981190255936, "grad_norm": 3.5911636352539062, "learning_rate": 8.755389423702884e-06, "loss": 0.9803, "step": 3278 }, { "epoch": 0.25277520814061055, "grad_norm": 3.92370343208313, "learning_rate": 8.754565064782533e-06, "loss": 0.9514, "step": 3279 }, { "epoch": 0.2528522972556275, "grad_norm": 3.5190653800964355, "learning_rate": 8.753740471784497e-06, "loss": 1.0362, "step": 3280 }, { "epoch": 0.25292938637064444, "grad_norm": 4.347536087036133, "learning_rate": 8.752915644760185e-06, "loss": 1.0841, "step": 3281 }, { "epoch": 0.2530064754856614, "grad_norm": 3.641761064529419, "learning_rate": 8.752090583761017e-06, "loss": 1.0113, "step": 3282 }, { "epoch": 0.2530835646006784, "grad_norm": 3.5918169021606445, "learning_rate": 8.751265288838435e-06, "loss": 0.9651, "step": 3283 }, { "epoch": 0.25316065371569535, "grad_norm": 3.788519859313965, "learning_rate": 8.750439760043892e-06, "loss": 1.0641, "step": 3284 }, { "epoch": 0.2532377428307123, "grad_norm": 3.539388656616211, "learning_rate": 8.749613997428852e-06, "loss": 1.1261, "step": 3285 }, { "epoch": 0.2533148319457293, "grad_norm": 4.3505167961120605, "learning_rate": 8.748788001044799e-06, "loss": 1.1336, "step": 3286 }, { "epoch": 0.2533919210607462, "grad_norm": 3.823928117752075, "learning_rate": 8.74796177094323e-06, "loss": 1.0041, "step": 3287 }, { "epoch": 0.2534690101757632, "grad_norm": 3.804438829421997, "learning_rate": 8.747135307175657e-06, "loss": 1.0816, "step": 3288 }, { "epoch": 0.25354609929078015, "grad_norm": 3.7979674339294434, "learning_rate": 8.746308609793601e-06, "loss": 1.0633, "step": 3289 }, { "epoch": 0.2536231884057971, "grad_norm": 3.7804579734802246, "learning_rate": 8.745481678848609e-06, "loss": 1.09, "step": 3290 }, { "epoch": 0.2537002775208141, "grad_norm": 4.1087470054626465, "learning_rate": 8.744654514392232e-06, "loss": 1.1016, "step": 3291 }, { "epoch": 0.253777366635831, "grad_norm": 3.6745924949645996, "learning_rate": 8.743827116476039e-06, "loss": 1.0273, "step": 3292 }, { "epoch": 0.253854455750848, "grad_norm": 3.827929735183716, "learning_rate": 8.742999485151617e-06, "loss": 1.1064, "step": 3293 }, { "epoch": 0.25393154486586494, "grad_norm": 3.5309934616088867, "learning_rate": 8.742171620470561e-06, "loss": 0.9478, "step": 3294 }, { "epoch": 0.2540086339808819, "grad_norm": 3.4990084171295166, "learning_rate": 8.741343522484486e-06, "loss": 0.9285, "step": 3295 }, { "epoch": 0.2540857230958989, "grad_norm": 3.40860915184021, "learning_rate": 8.74051519124502e-06, "loss": 0.8972, "step": 3296 }, { "epoch": 0.2541628122109158, "grad_norm": 3.403424024581909, "learning_rate": 8.739686626803802e-06, "loss": 1.059, "step": 3297 }, { "epoch": 0.25423990132593277, "grad_norm": 4.193357944488525, "learning_rate": 8.738857829212495e-06, "loss": 0.9641, "step": 3298 }, { "epoch": 0.25431699044094974, "grad_norm": 4.222657203674316, "learning_rate": 8.738028798522762e-06, "loss": 1.0695, "step": 3299 }, { "epoch": 0.2543940795559667, "grad_norm": 3.387126922607422, "learning_rate": 8.737199534786297e-06, "loss": 1.1003, "step": 3300 }, { "epoch": 0.2544711686709837, "grad_norm": 3.9024600982666016, "learning_rate": 8.736370038054796e-06, "loss": 1.0785, "step": 3301 }, { "epoch": 0.2545482577860006, "grad_norm": 3.78934383392334, "learning_rate": 8.735540308379974e-06, "loss": 1.0066, "step": 3302 }, { "epoch": 0.25462534690101757, "grad_norm": 3.661975383758545, "learning_rate": 8.73471034581356e-06, "loss": 1.0309, "step": 3303 }, { "epoch": 0.25470243601603454, "grad_norm": 3.687753915786743, "learning_rate": 8.733880150407296e-06, "loss": 1.0173, "step": 3304 }, { "epoch": 0.2547795251310515, "grad_norm": 3.935116767883301, "learning_rate": 8.733049722212946e-06, "loss": 1.1019, "step": 3305 }, { "epoch": 0.2548566142460685, "grad_norm": 3.9937877655029297, "learning_rate": 8.73221906128228e-06, "loss": 1.1622, "step": 3306 }, { "epoch": 0.2549337033610854, "grad_norm": 3.838475227355957, "learning_rate": 8.731388167667083e-06, "loss": 0.977, "step": 3307 }, { "epoch": 0.25501079247610237, "grad_norm": 3.6274874210357666, "learning_rate": 8.73055704141916e-06, "loss": 1.098, "step": 3308 }, { "epoch": 0.25508788159111934, "grad_norm": 3.570650577545166, "learning_rate": 8.729725682590329e-06, "loss": 0.9655, "step": 3309 }, { "epoch": 0.2551649707061363, "grad_norm": 3.7613351345062256, "learning_rate": 8.728894091232417e-06, "loss": 0.9714, "step": 3310 }, { "epoch": 0.2552420598211533, "grad_norm": 3.4735591411590576, "learning_rate": 8.728062267397268e-06, "loss": 0.8959, "step": 3311 }, { "epoch": 0.2553191489361702, "grad_norm": 3.8954594135284424, "learning_rate": 8.727230211136747e-06, "loss": 1.0228, "step": 3312 }, { "epoch": 0.25539623805118716, "grad_norm": 4.0239949226379395, "learning_rate": 8.726397922502727e-06, "loss": 1.0255, "step": 3313 }, { "epoch": 0.25547332716620413, "grad_norm": 3.6205320358276367, "learning_rate": 8.725565401547096e-06, "loss": 0.8827, "step": 3314 }, { "epoch": 0.2555504162812211, "grad_norm": 3.7505264282226562, "learning_rate": 8.724732648321756e-06, "loss": 1.0148, "step": 3315 }, { "epoch": 0.2556275053962381, "grad_norm": 3.5387229919433594, "learning_rate": 8.723899662878627e-06, "loss": 0.9492, "step": 3316 }, { "epoch": 0.255704594511255, "grad_norm": 3.82112193107605, "learning_rate": 8.72306644526964e-06, "loss": 0.9834, "step": 3317 }, { "epoch": 0.25578168362627196, "grad_norm": 3.7389678955078125, "learning_rate": 8.722232995546742e-06, "loss": 0.9932, "step": 3318 }, { "epoch": 0.25585877274128893, "grad_norm": 3.6923506259918213, "learning_rate": 8.721399313761896e-06, "loss": 0.9353, "step": 3319 }, { "epoch": 0.2559358618563059, "grad_norm": 3.9490480422973633, "learning_rate": 8.720565399967076e-06, "loss": 1.0178, "step": 3320 }, { "epoch": 0.2560129509713229, "grad_norm": 3.4159951210021973, "learning_rate": 8.719731254214271e-06, "loss": 0.967, "step": 3321 }, { "epoch": 0.2560900400863398, "grad_norm": 3.7180635929107666, "learning_rate": 8.71889687655549e-06, "loss": 0.902, "step": 3322 }, { "epoch": 0.25616712920135676, "grad_norm": 4.072498321533203, "learning_rate": 8.718062267042749e-06, "loss": 1.0151, "step": 3323 }, { "epoch": 0.25624421831637373, "grad_norm": 4.77862548828125, "learning_rate": 8.71722742572808e-06, "loss": 1.0888, "step": 3324 }, { "epoch": 0.2563213074313907, "grad_norm": 3.858112096786499, "learning_rate": 8.716392352663535e-06, "loss": 1.015, "step": 3325 }, { "epoch": 0.25639839654640767, "grad_norm": 3.6426641941070557, "learning_rate": 8.715557047901174e-06, "loss": 1.0053, "step": 3326 }, { "epoch": 0.2564754856614246, "grad_norm": 3.9015743732452393, "learning_rate": 8.714721511493074e-06, "loss": 1.2689, "step": 3327 }, { "epoch": 0.25655257477644156, "grad_norm": 4.314653396606445, "learning_rate": 8.713885743491327e-06, "loss": 1.0598, "step": 3328 }, { "epoch": 0.2566296638914585, "grad_norm": 4.168483257293701, "learning_rate": 8.713049743948038e-06, "loss": 1.1855, "step": 3329 }, { "epoch": 0.2567067530064755, "grad_norm": 4.1147356033325195, "learning_rate": 8.71221351291533e-06, "loss": 1.0772, "step": 3330 }, { "epoch": 0.25678384212149247, "grad_norm": 4.284569263458252, "learning_rate": 8.711377050445333e-06, "loss": 0.9965, "step": 3331 }, { "epoch": 0.2568609312365094, "grad_norm": 4.178953647613525, "learning_rate": 8.710540356590198e-06, "loss": 1.056, "step": 3332 }, { "epoch": 0.25693802035152635, "grad_norm": 3.9649317264556885, "learning_rate": 8.709703431402092e-06, "loss": 0.9488, "step": 3333 }, { "epoch": 0.2570151094665433, "grad_norm": 3.9904897212982178, "learning_rate": 8.70886627493319e-06, "loss": 1.033, "step": 3334 }, { "epoch": 0.2570921985815603, "grad_norm": 3.681057929992676, "learning_rate": 8.708028887235682e-06, "loss": 0.9747, "step": 3335 }, { "epoch": 0.25716928769657726, "grad_norm": 3.687772750854492, "learning_rate": 8.707191268361778e-06, "loss": 0.9872, "step": 3336 }, { "epoch": 0.2572463768115942, "grad_norm": 3.913752794265747, "learning_rate": 8.7063534183637e-06, "loss": 1.085, "step": 3337 }, { "epoch": 0.25732346592661115, "grad_norm": 4.11881685256958, "learning_rate": 8.705515337293682e-06, "loss": 1.0011, "step": 3338 }, { "epoch": 0.2574005550416281, "grad_norm": 3.4855170249938965, "learning_rate": 8.704677025203972e-06, "loss": 0.9699, "step": 3339 }, { "epoch": 0.2574776441566451, "grad_norm": 3.873643636703491, "learning_rate": 8.703838482146837e-06, "loss": 1.0217, "step": 3340 }, { "epoch": 0.25755473327166206, "grad_norm": 3.451205015182495, "learning_rate": 8.702999708174557e-06, "loss": 0.9768, "step": 3341 }, { "epoch": 0.257631822386679, "grad_norm": 3.673360586166382, "learning_rate": 8.702160703339422e-06, "loss": 1.0151, "step": 3342 }, { "epoch": 0.25770891150169595, "grad_norm": 3.864973306655884, "learning_rate": 8.701321467693741e-06, "loss": 1.0275, "step": 3343 }, { "epoch": 0.2577860006167129, "grad_norm": 3.8561880588531494, "learning_rate": 8.700482001289838e-06, "loss": 1.0697, "step": 3344 }, { "epoch": 0.2578630897317299, "grad_norm": 4.72223424911499, "learning_rate": 8.699642304180045e-06, "loss": 1.1577, "step": 3345 }, { "epoch": 0.25794017884674686, "grad_norm": 3.793133020401001, "learning_rate": 8.698802376416718e-06, "loss": 0.9992, "step": 3346 }, { "epoch": 0.2580172679617638, "grad_norm": 3.360287666320801, "learning_rate": 8.697962218052217e-06, "loss": 0.9239, "step": 3347 }, { "epoch": 0.25809435707678074, "grad_norm": 3.907996416091919, "learning_rate": 8.697121829138925e-06, "loss": 0.9704, "step": 3348 }, { "epoch": 0.2581714461917977, "grad_norm": 3.603130578994751, "learning_rate": 8.696281209729234e-06, "loss": 0.9822, "step": 3349 }, { "epoch": 0.2582485353068147, "grad_norm": 3.7168142795562744, "learning_rate": 8.695440359875554e-06, "loss": 1.0306, "step": 3350 }, { "epoch": 0.25832562442183166, "grad_norm": 3.7132720947265625, "learning_rate": 8.694599279630306e-06, "loss": 1.0643, "step": 3351 }, { "epoch": 0.25840271353684857, "grad_norm": 3.9718921184539795, "learning_rate": 8.693757969045928e-06, "loss": 0.9594, "step": 3352 }, { "epoch": 0.25847980265186554, "grad_norm": 3.7555110454559326, "learning_rate": 8.692916428174872e-06, "loss": 1.0797, "step": 3353 }, { "epoch": 0.2585568917668825, "grad_norm": 3.6487925052642822, "learning_rate": 8.692074657069602e-06, "loss": 0.9086, "step": 3354 }, { "epoch": 0.2586339808818995, "grad_norm": 3.890230178833008, "learning_rate": 8.6912326557826e-06, "loss": 1.0957, "step": 3355 }, { "epoch": 0.25871106999691645, "grad_norm": 3.4611878395080566, "learning_rate": 8.690390424366358e-06, "loss": 0.9665, "step": 3356 }, { "epoch": 0.25878815911193337, "grad_norm": 3.949563980102539, "learning_rate": 8.689547962873386e-06, "loss": 0.9853, "step": 3357 }, { "epoch": 0.25886524822695034, "grad_norm": 3.7639212608337402, "learning_rate": 8.688705271356208e-06, "loss": 1.0879, "step": 3358 }, { "epoch": 0.2589423373419673, "grad_norm": 3.696392297744751, "learning_rate": 8.68786234986736e-06, "loss": 0.9405, "step": 3359 }, { "epoch": 0.2590194264569843, "grad_norm": 4.061792850494385, "learning_rate": 8.687019198459395e-06, "loss": 1.0346, "step": 3360 }, { "epoch": 0.25909651557200125, "grad_norm": 3.8702781200408936, "learning_rate": 8.686175817184878e-06, "loss": 1.0629, "step": 3361 }, { "epoch": 0.25917360468701817, "grad_norm": 3.8583784103393555, "learning_rate": 8.685332206096391e-06, "loss": 1.0525, "step": 3362 }, { "epoch": 0.25925069380203514, "grad_norm": 3.554065465927124, "learning_rate": 8.684488365246526e-06, "loss": 0.9169, "step": 3363 }, { "epoch": 0.2593277829170521, "grad_norm": 3.972473382949829, "learning_rate": 8.683644294687893e-06, "loss": 1.012, "step": 3364 }, { "epoch": 0.2594048720320691, "grad_norm": 4.070230484008789, "learning_rate": 8.68279999447312e-06, "loss": 0.9605, "step": 3365 }, { "epoch": 0.25948196114708605, "grad_norm": 3.5376861095428467, "learning_rate": 8.681955464654839e-06, "loss": 0.8759, "step": 3366 }, { "epoch": 0.25955905026210296, "grad_norm": 3.8391125202178955, "learning_rate": 8.681110705285705e-06, "loss": 0.989, "step": 3367 }, { "epoch": 0.25963613937711993, "grad_norm": 3.567213296890259, "learning_rate": 8.680265716418381e-06, "loss": 1.0166, "step": 3368 }, { "epoch": 0.2597132284921369, "grad_norm": 3.484469413757324, "learning_rate": 8.679420498105553e-06, "loss": 0.9996, "step": 3369 }, { "epoch": 0.2597903176071539, "grad_norm": 3.5866034030914307, "learning_rate": 8.678575050399912e-06, "loss": 0.9088, "step": 3370 }, { "epoch": 0.25986740672217085, "grad_norm": 3.686366319656372, "learning_rate": 8.677729373354169e-06, "loss": 1.0459, "step": 3371 }, { "epoch": 0.2599444958371878, "grad_norm": 3.622049570083618, "learning_rate": 8.676883467021046e-06, "loss": 1.1149, "step": 3372 }, { "epoch": 0.26002158495220473, "grad_norm": 3.5664947032928467, "learning_rate": 8.676037331453283e-06, "loss": 1.0691, "step": 3373 }, { "epoch": 0.2600986740672217, "grad_norm": 3.5930142402648926, "learning_rate": 8.675190966703631e-06, "loss": 1.0553, "step": 3374 }, { "epoch": 0.2601757631822387, "grad_norm": 3.7880308628082275, "learning_rate": 8.674344372824855e-06, "loss": 1.024, "step": 3375 }, { "epoch": 0.26025285229725564, "grad_norm": 4.235076427459717, "learning_rate": 8.673497549869738e-06, "loss": 1.0325, "step": 3376 }, { "epoch": 0.2603299414122726, "grad_norm": 3.9409871101379395, "learning_rate": 8.672650497891075e-06, "loss": 1.0099, "step": 3377 }, { "epoch": 0.26040703052728953, "grad_norm": 3.7281947135925293, "learning_rate": 8.671803216941674e-06, "loss": 1.0297, "step": 3378 }, { "epoch": 0.2604841196423065, "grad_norm": 4.101794719696045, "learning_rate": 8.67095570707436e-06, "loss": 1.0915, "step": 3379 }, { "epoch": 0.26056120875732347, "grad_norm": 3.782160997390747, "learning_rate": 8.67010796834197e-06, "loss": 1.0381, "step": 3380 }, { "epoch": 0.26063829787234044, "grad_norm": 3.6193161010742188, "learning_rate": 8.669260000797355e-06, "loss": 0.9774, "step": 3381 }, { "epoch": 0.2607153869873574, "grad_norm": 3.8134310245513916, "learning_rate": 8.668411804493384e-06, "loss": 1.0437, "step": 3382 }, { "epoch": 0.2607924761023743, "grad_norm": 3.7592546939849854, "learning_rate": 8.667563379482934e-06, "loss": 0.922, "step": 3383 }, { "epoch": 0.2608695652173913, "grad_norm": 3.698714256286621, "learning_rate": 8.666714725818903e-06, "loss": 0.9594, "step": 3384 }, { "epoch": 0.26094665433240827, "grad_norm": 3.6574625968933105, "learning_rate": 8.6658658435542e-06, "loss": 1.0836, "step": 3385 }, { "epoch": 0.26102374344742524, "grad_norm": 3.509253740310669, "learning_rate": 8.665016732741748e-06, "loss": 1.0096, "step": 3386 }, { "epoch": 0.2611008325624422, "grad_norm": 3.7333405017852783, "learning_rate": 8.664167393434484e-06, "loss": 1.1459, "step": 3387 }, { "epoch": 0.2611779216774591, "grad_norm": 3.782824993133545, "learning_rate": 8.66331782568536e-06, "loss": 1.0318, "step": 3388 }, { "epoch": 0.2612550107924761, "grad_norm": 3.6097073554992676, "learning_rate": 8.662468029547341e-06, "loss": 0.8997, "step": 3389 }, { "epoch": 0.26133209990749307, "grad_norm": 3.526141405105591, "learning_rate": 8.661618005073412e-06, "loss": 0.9869, "step": 3390 }, { "epoch": 0.26140918902251004, "grad_norm": 3.5635085105895996, "learning_rate": 8.66076775231656e-06, "loss": 0.9146, "step": 3391 }, { "epoch": 0.261486278137527, "grad_norm": 3.9402894973754883, "learning_rate": 8.659917271329801e-06, "loss": 1.1514, "step": 3392 }, { "epoch": 0.2615633672525439, "grad_norm": 4.007856845855713, "learning_rate": 8.659066562166157e-06, "loss": 1.0842, "step": 3393 }, { "epoch": 0.2616404563675609, "grad_norm": 3.9499897956848145, "learning_rate": 8.65821562487866e-06, "loss": 1.0736, "step": 3394 }, { "epoch": 0.26171754548257786, "grad_norm": 4.446572303771973, "learning_rate": 8.657364459520367e-06, "loss": 1.0391, "step": 3395 }, { "epoch": 0.26179463459759483, "grad_norm": 3.946072816848755, "learning_rate": 8.656513066144342e-06, "loss": 1.1018, "step": 3396 }, { "epoch": 0.2618717237126118, "grad_norm": 3.971186637878418, "learning_rate": 8.655661444803664e-06, "loss": 1.0861, "step": 3397 }, { "epoch": 0.2619488128276287, "grad_norm": 3.788757801055908, "learning_rate": 8.654809595551429e-06, "loss": 1.1189, "step": 3398 }, { "epoch": 0.2620259019426457, "grad_norm": 3.536916732788086, "learning_rate": 8.653957518440743e-06, "loss": 0.9544, "step": 3399 }, { "epoch": 0.26210299105766266, "grad_norm": 3.843003749847412, "learning_rate": 8.653105213524733e-06, "loss": 0.9992, "step": 3400 }, { "epoch": 0.26218008017267963, "grad_norm": 3.6438448429107666, "learning_rate": 8.65225268085653e-06, "loss": 1.0176, "step": 3401 }, { "epoch": 0.2622571692876966, "grad_norm": 4.137069225311279, "learning_rate": 8.65139992048929e-06, "loss": 1.0994, "step": 3402 }, { "epoch": 0.2623342584027135, "grad_norm": 3.9459714889526367, "learning_rate": 8.650546932476173e-06, "loss": 1.0303, "step": 3403 }, { "epoch": 0.2624113475177305, "grad_norm": 3.64534068107605, "learning_rate": 8.649693716870364e-06, "loss": 0.9538, "step": 3404 }, { "epoch": 0.26248843663274746, "grad_norm": 3.5429468154907227, "learning_rate": 8.648840273725055e-06, "loss": 0.9813, "step": 3405 }, { "epoch": 0.26256552574776443, "grad_norm": 3.9356422424316406, "learning_rate": 8.64798660309345e-06, "loss": 1.0192, "step": 3406 }, { "epoch": 0.2626426148627814, "grad_norm": 3.7008917331695557, "learning_rate": 8.647132705028776e-06, "loss": 1.0213, "step": 3407 }, { "epoch": 0.2627197039777983, "grad_norm": 3.5144479274749756, "learning_rate": 8.646278579584265e-06, "loss": 0.9657, "step": 3408 }, { "epoch": 0.2627967930928153, "grad_norm": 3.8584909439086914, "learning_rate": 8.64542422681317e-06, "loss": 1.1071, "step": 3409 }, { "epoch": 0.26287388220783225, "grad_norm": 3.93780779838562, "learning_rate": 8.644569646768755e-06, "loss": 1.0209, "step": 3410 }, { "epoch": 0.2629509713228492, "grad_norm": 3.6556358337402344, "learning_rate": 8.643714839504296e-06, "loss": 1.0174, "step": 3411 }, { "epoch": 0.2630280604378662, "grad_norm": 4.030715465545654, "learning_rate": 8.64285980507309e-06, "loss": 1.0071, "step": 3412 }, { "epoch": 0.2631051495528831, "grad_norm": 3.5671634674072266, "learning_rate": 8.642004543528442e-06, "loss": 1.0613, "step": 3413 }, { "epoch": 0.2631822386679001, "grad_norm": 3.487668991088867, "learning_rate": 8.641149054923673e-06, "loss": 0.9665, "step": 3414 }, { "epoch": 0.26325932778291705, "grad_norm": 3.8126344680786133, "learning_rate": 8.640293339312119e-06, "loss": 0.9724, "step": 3415 }, { "epoch": 0.263336416897934, "grad_norm": 3.6022071838378906, "learning_rate": 8.639437396747127e-06, "loss": 1.0247, "step": 3416 }, { "epoch": 0.263413506012951, "grad_norm": 3.6988253593444824, "learning_rate": 8.638581227282064e-06, "loss": 1.0596, "step": 3417 }, { "epoch": 0.2634905951279679, "grad_norm": 3.569603204727173, "learning_rate": 8.637724830970307e-06, "loss": 1.0096, "step": 3418 }, { "epoch": 0.2635676842429849, "grad_norm": 3.479275941848755, "learning_rate": 8.636868207865244e-06, "loss": 1.0446, "step": 3419 }, { "epoch": 0.26364477335800185, "grad_norm": 3.994907855987549, "learning_rate": 8.636011358020286e-06, "loss": 0.9947, "step": 3420 }, { "epoch": 0.2637218624730188, "grad_norm": 3.892772912979126, "learning_rate": 8.635154281488851e-06, "loss": 1.1448, "step": 3421 }, { "epoch": 0.2637989515880358, "grad_norm": 3.7717888355255127, "learning_rate": 8.634296978324374e-06, "loss": 1.0227, "step": 3422 }, { "epoch": 0.2638760407030527, "grad_norm": 3.6846694946289062, "learning_rate": 8.633439448580302e-06, "loss": 0.9988, "step": 3423 }, { "epoch": 0.2639531298180697, "grad_norm": 3.5329177379608154, "learning_rate": 8.6325816923101e-06, "loss": 0.9628, "step": 3424 }, { "epoch": 0.26403021893308665, "grad_norm": 3.6999776363372803, "learning_rate": 8.631723709567242e-06, "loss": 1.0493, "step": 3425 }, { "epoch": 0.2641073080481036, "grad_norm": 3.73701810836792, "learning_rate": 8.630865500405218e-06, "loss": 1.0119, "step": 3426 }, { "epoch": 0.2641843971631206, "grad_norm": 3.6966707706451416, "learning_rate": 8.630007064877538e-06, "loss": 1.0202, "step": 3427 }, { "epoch": 0.2642614862781375, "grad_norm": 3.5695645809173584, "learning_rate": 8.629148403037715e-06, "loss": 0.9439, "step": 3428 }, { "epoch": 0.2643385753931545, "grad_norm": 3.6183085441589355, "learning_rate": 8.628289514939287e-06, "loss": 1.0232, "step": 3429 }, { "epoch": 0.26441566450817144, "grad_norm": 3.7045369148254395, "learning_rate": 8.6274304006358e-06, "loss": 0.932, "step": 3430 }, { "epoch": 0.2644927536231884, "grad_norm": 3.988922119140625, "learning_rate": 8.626571060180812e-06, "loss": 1.024, "step": 3431 }, { "epoch": 0.2645698427382054, "grad_norm": 3.669790029525757, "learning_rate": 8.625711493627902e-06, "loss": 1.0481, "step": 3432 }, { "epoch": 0.2646469318532223, "grad_norm": 3.9124808311462402, "learning_rate": 8.62485170103066e-06, "loss": 1.0879, "step": 3433 }, { "epoch": 0.26472402096823927, "grad_norm": 3.7043819427490234, "learning_rate": 8.623991682442685e-06, "loss": 1.0116, "step": 3434 }, { "epoch": 0.26480111008325624, "grad_norm": 3.8101680278778076, "learning_rate": 8.623131437917598e-06, "loss": 1.0168, "step": 3435 }, { "epoch": 0.2648781991982732, "grad_norm": 3.652815103530884, "learning_rate": 8.622270967509032e-06, "loss": 1.1259, "step": 3436 }, { "epoch": 0.2649552883132902, "grad_norm": 3.873318910598755, "learning_rate": 8.621410271270632e-06, "loss": 1.1589, "step": 3437 }, { "epoch": 0.2650323774283071, "grad_norm": 3.489452362060547, "learning_rate": 8.620549349256056e-06, "loss": 0.9174, "step": 3438 }, { "epoch": 0.26510946654332407, "grad_norm": 3.7744300365448, "learning_rate": 8.61968820151898e-06, "loss": 1.0291, "step": 3439 }, { "epoch": 0.26518655565834104, "grad_norm": 3.8608028888702393, "learning_rate": 8.61882682811309e-06, "loss": 1.0441, "step": 3440 }, { "epoch": 0.265263644773358, "grad_norm": 3.826913595199585, "learning_rate": 8.61796522909209e-06, "loss": 1.058, "step": 3441 }, { "epoch": 0.265340733888375, "grad_norm": 4.063562393188477, "learning_rate": 8.617103404509699e-06, "loss": 0.9656, "step": 3442 }, { "epoch": 0.2654178230033919, "grad_norm": 4.010993957519531, "learning_rate": 8.616241354419642e-06, "loss": 1.1192, "step": 3443 }, { "epoch": 0.26549491211840887, "grad_norm": 4.178236961364746, "learning_rate": 8.615379078875664e-06, "loss": 0.9984, "step": 3444 }, { "epoch": 0.26557200123342584, "grad_norm": 4.172964572906494, "learning_rate": 8.614516577931526e-06, "loss": 1.0439, "step": 3445 }, { "epoch": 0.2656490903484428, "grad_norm": 3.271082878112793, "learning_rate": 8.613653851641001e-06, "loss": 0.9694, "step": 3446 }, { "epoch": 0.2657261794634598, "grad_norm": 3.594104766845703, "learning_rate": 8.612790900057873e-06, "loss": 1.0078, "step": 3447 }, { "epoch": 0.2658032685784767, "grad_norm": 3.8845951557159424, "learning_rate": 8.611927723235943e-06, "loss": 1.1074, "step": 3448 }, { "epoch": 0.26588035769349366, "grad_norm": 3.545318365097046, "learning_rate": 8.611064321229027e-06, "loss": 1.0609, "step": 3449 }, { "epoch": 0.26595744680851063, "grad_norm": 3.784323215484619, "learning_rate": 8.610200694090951e-06, "loss": 0.9997, "step": 3450 }, { "epoch": 0.2660345359235276, "grad_norm": 3.6356170177459717, "learning_rate": 8.609336841875561e-06, "loss": 0.9148, "step": 3451 }, { "epoch": 0.2661116250385446, "grad_norm": 4.235599040985107, "learning_rate": 8.608472764636714e-06, "loss": 1.1123, "step": 3452 }, { "epoch": 0.2661887141535615, "grad_norm": 3.789675712585449, "learning_rate": 8.607608462428273e-06, "loss": 1.0678, "step": 3453 }, { "epoch": 0.26626580326857846, "grad_norm": 3.547661542892456, "learning_rate": 8.606743935304134e-06, "loss": 1.0874, "step": 3454 }, { "epoch": 0.26634289238359543, "grad_norm": 3.812304735183716, "learning_rate": 8.605879183318188e-06, "loss": 0.9825, "step": 3455 }, { "epoch": 0.2664199814986124, "grad_norm": 3.6646666526794434, "learning_rate": 8.605014206524352e-06, "loss": 1.097, "step": 3456 }, { "epoch": 0.26649707061362937, "grad_norm": 3.943058490753174, "learning_rate": 8.60414900497655e-06, "loss": 0.9482, "step": 3457 }, { "epoch": 0.26657415972864634, "grad_norm": 3.732710838317871, "learning_rate": 8.603283578728723e-06, "loss": 1.0008, "step": 3458 }, { "epoch": 0.26665124884366326, "grad_norm": 3.5974366664886475, "learning_rate": 8.60241792783483e-06, "loss": 1.0185, "step": 3459 }, { "epoch": 0.26672833795868023, "grad_norm": 3.4493045806884766, "learning_rate": 8.601552052348833e-06, "loss": 1.0396, "step": 3460 }, { "epoch": 0.2668054270736972, "grad_norm": 3.9309725761413574, "learning_rate": 8.60068595232472e-06, "loss": 0.9397, "step": 3461 }, { "epoch": 0.26688251618871417, "grad_norm": 3.614811420440674, "learning_rate": 8.599819627816486e-06, "loss": 1.0507, "step": 3462 }, { "epoch": 0.26695960530373114, "grad_norm": 3.847226619720459, "learning_rate": 8.598953078878142e-06, "loss": 1.0411, "step": 3463 }, { "epoch": 0.26703669441874806, "grad_norm": 4.487602233886719, "learning_rate": 8.598086305563714e-06, "loss": 1.1693, "step": 3464 }, { "epoch": 0.267113783533765, "grad_norm": 3.617213487625122, "learning_rate": 8.597219307927239e-06, "loss": 1.0972, "step": 3465 }, { "epoch": 0.267190872648782, "grad_norm": 4.097979545593262, "learning_rate": 8.59635208602277e-06, "loss": 1.0642, "step": 3466 }, { "epoch": 0.26726796176379897, "grad_norm": 3.6943976879119873, "learning_rate": 8.595484639904375e-06, "loss": 1.0495, "step": 3467 }, { "epoch": 0.26734505087881594, "grad_norm": 4.322422981262207, "learning_rate": 8.594616969626134e-06, "loss": 1.0781, "step": 3468 }, { "epoch": 0.26742213999383285, "grad_norm": 3.314831495285034, "learning_rate": 8.593749075242143e-06, "loss": 0.8829, "step": 3469 }, { "epoch": 0.2674992291088498, "grad_norm": 3.7989742755889893, "learning_rate": 8.592880956806509e-06, "loss": 0.9417, "step": 3470 }, { "epoch": 0.2675763182238668, "grad_norm": 3.5251259803771973, "learning_rate": 8.592012614373355e-06, "loss": 0.9525, "step": 3471 }, { "epoch": 0.26765340733888376, "grad_norm": 3.975076198577881, "learning_rate": 8.591144047996817e-06, "loss": 1.1792, "step": 3472 }, { "epoch": 0.26773049645390073, "grad_norm": 3.9003539085388184, "learning_rate": 8.59027525773105e-06, "loss": 1.0923, "step": 3473 }, { "epoch": 0.26780758556891765, "grad_norm": 3.6139657497406006, "learning_rate": 8.589406243630212e-06, "loss": 0.9957, "step": 3474 }, { "epoch": 0.2678846746839346, "grad_norm": 4.016913414001465, "learning_rate": 8.588537005748484e-06, "loss": 1.1282, "step": 3475 }, { "epoch": 0.2679617637989516, "grad_norm": 3.5826406478881836, "learning_rate": 8.587667544140063e-06, "loss": 1.0285, "step": 3476 }, { "epoch": 0.26803885291396856, "grad_norm": 3.5834274291992188, "learning_rate": 8.586797858859149e-06, "loss": 0.941, "step": 3477 }, { "epoch": 0.26811594202898553, "grad_norm": 3.8702480792999268, "learning_rate": 8.585927949959965e-06, "loss": 1.0519, "step": 3478 }, { "epoch": 0.26819303114400245, "grad_norm": 3.2615435123443604, "learning_rate": 8.585057817496747e-06, "loss": 0.9371, "step": 3479 }, { "epoch": 0.2682701202590194, "grad_norm": 3.683424949645996, "learning_rate": 8.584187461523741e-06, "loss": 0.9695, "step": 3480 }, { "epoch": 0.2683472093740364, "grad_norm": 3.533581018447876, "learning_rate": 8.583316882095209e-06, "loss": 1.0256, "step": 3481 }, { "epoch": 0.26842429848905336, "grad_norm": 3.866887331008911, "learning_rate": 8.582446079265428e-06, "loss": 1.115, "step": 3482 }, { "epoch": 0.26850138760407033, "grad_norm": 3.444634437561035, "learning_rate": 8.581575053088687e-06, "loss": 0.9583, "step": 3483 }, { "epoch": 0.26857847671908724, "grad_norm": 3.9610986709594727, "learning_rate": 8.580703803619292e-06, "loss": 0.9974, "step": 3484 }, { "epoch": 0.2686555658341042, "grad_norm": 3.6639208793640137, "learning_rate": 8.57983233091156e-06, "loss": 1.0442, "step": 3485 }, { "epoch": 0.2687326549491212, "grad_norm": 3.690537929534912, "learning_rate": 8.578960635019822e-06, "loss": 1.0666, "step": 3486 }, { "epoch": 0.26880974406413816, "grad_norm": 3.4930202960968018, "learning_rate": 8.578088715998425e-06, "loss": 0.9999, "step": 3487 }, { "epoch": 0.2688868331791551, "grad_norm": 3.7361273765563965, "learning_rate": 8.577216573901727e-06, "loss": 0.9615, "step": 3488 }, { "epoch": 0.26896392229417204, "grad_norm": 3.7041215896606445, "learning_rate": 8.576344208784104e-06, "loss": 1.1125, "step": 3489 }, { "epoch": 0.269041011409189, "grad_norm": 3.4133498668670654, "learning_rate": 8.575471620699942e-06, "loss": 0.9674, "step": 3490 }, { "epoch": 0.269118100524206, "grad_norm": 3.8262181282043457, "learning_rate": 8.57459880970364e-06, "loss": 1.1, "step": 3491 }, { "epoch": 0.26919518963922295, "grad_norm": 3.9435431957244873, "learning_rate": 8.573725775849617e-06, "loss": 1.1601, "step": 3492 }, { "epoch": 0.2692722787542399, "grad_norm": 4.27089786529541, "learning_rate": 8.5728525191923e-06, "loss": 1.085, "step": 3493 }, { "epoch": 0.26934936786925684, "grad_norm": 3.4078094959259033, "learning_rate": 8.571979039786135e-06, "loss": 0.9794, "step": 3494 }, { "epoch": 0.2694264569842738, "grad_norm": 3.4169962406158447, "learning_rate": 8.571105337685575e-06, "loss": 0.9074, "step": 3495 }, { "epoch": 0.2695035460992908, "grad_norm": 3.312422752380371, "learning_rate": 8.570231412945092e-06, "loss": 0.9965, "step": 3496 }, { "epoch": 0.26958063521430775, "grad_norm": 4.496835231781006, "learning_rate": 8.569357265619172e-06, "loss": 1.0674, "step": 3497 }, { "epoch": 0.2696577243293247, "grad_norm": 3.7908670902252197, "learning_rate": 8.56848289576231e-06, "loss": 0.9237, "step": 3498 }, { "epoch": 0.26973481344434164, "grad_norm": 3.9191906452178955, "learning_rate": 8.567608303429024e-06, "loss": 0.9648, "step": 3499 }, { "epoch": 0.2698119025593586, "grad_norm": 3.607534408569336, "learning_rate": 8.566733488673837e-06, "loss": 1.0898, "step": 3500 }, { "epoch": 0.2698889916743756, "grad_norm": 4.031132698059082, "learning_rate": 8.56585845155129e-06, "loss": 0.9965, "step": 3501 }, { "epoch": 0.26996608078939255, "grad_norm": 3.5480751991271973, "learning_rate": 8.564983192115934e-06, "loss": 0.9761, "step": 3502 }, { "epoch": 0.2700431699044095, "grad_norm": 4.214536666870117, "learning_rate": 8.564107710422343e-06, "loss": 1.0331, "step": 3503 }, { "epoch": 0.27012025901942643, "grad_norm": 3.7173125743865967, "learning_rate": 8.563232006525093e-06, "loss": 1.0236, "step": 3504 }, { "epoch": 0.2701973481344434, "grad_norm": 4.071146011352539, "learning_rate": 8.562356080478781e-06, "loss": 1.0041, "step": 3505 }, { "epoch": 0.2702744372494604, "grad_norm": 3.6947145462036133, "learning_rate": 8.56147993233802e-06, "loss": 1.0646, "step": 3506 }, { "epoch": 0.27035152636447735, "grad_norm": 3.899477243423462, "learning_rate": 8.560603562157428e-06, "loss": 0.9962, "step": 3507 }, { "epoch": 0.2704286154794943, "grad_norm": 3.6149401664733887, "learning_rate": 8.559726969991646e-06, "loss": 0.9407, "step": 3508 }, { "epoch": 0.27050570459451123, "grad_norm": 3.5481820106506348, "learning_rate": 8.558850155895325e-06, "loss": 0.9833, "step": 3509 }, { "epoch": 0.2705827937095282, "grad_norm": 3.8647804260253906, "learning_rate": 8.557973119923126e-06, "loss": 0.9107, "step": 3510 }, { "epoch": 0.2706598828245452, "grad_norm": 4.967718601226807, "learning_rate": 8.557095862129732e-06, "loss": 1.1314, "step": 3511 }, { "epoch": 0.27073697193956214, "grad_norm": 3.775641918182373, "learning_rate": 8.556218382569832e-06, "loss": 0.9328, "step": 3512 }, { "epoch": 0.2708140610545791, "grad_norm": 3.7387449741363525, "learning_rate": 8.555340681298136e-06, "loss": 1.074, "step": 3513 }, { "epoch": 0.27089115016959603, "grad_norm": 3.9792206287384033, "learning_rate": 8.554462758369362e-06, "loss": 1.0373, "step": 3514 }, { "epoch": 0.270968239284613, "grad_norm": 4.240675926208496, "learning_rate": 8.553584613838243e-06, "loss": 1.0737, "step": 3515 }, { "epoch": 0.27104532839962997, "grad_norm": 3.890157699584961, "learning_rate": 8.552706247759527e-06, "loss": 0.9872, "step": 3516 }, { "epoch": 0.27112241751464694, "grad_norm": 3.4992425441741943, "learning_rate": 8.551827660187978e-06, "loss": 0.9768, "step": 3517 }, { "epoch": 0.2711995066296639, "grad_norm": 3.600581407546997, "learning_rate": 8.550948851178368e-06, "loss": 0.9799, "step": 3518 }, { "epoch": 0.2712765957446808, "grad_norm": 4.245118141174316, "learning_rate": 8.55006982078549e-06, "loss": 1.1133, "step": 3519 }, { "epoch": 0.2713536848596978, "grad_norm": 3.7988290786743164, "learning_rate": 8.549190569064144e-06, "loss": 0.9969, "step": 3520 }, { "epoch": 0.27143077397471477, "grad_norm": 4.244655132293701, "learning_rate": 8.548311096069148e-06, "loss": 1.1576, "step": 3521 }, { "epoch": 0.27150786308973174, "grad_norm": 3.901003360748291, "learning_rate": 8.547431401855333e-06, "loss": 1.0142, "step": 3522 }, { "epoch": 0.2715849522047487, "grad_norm": 3.8016793727874756, "learning_rate": 8.546551486477542e-06, "loss": 1.0619, "step": 3523 }, { "epoch": 0.2716620413197656, "grad_norm": 3.773573637008667, "learning_rate": 8.545671349990633e-06, "loss": 1.0336, "step": 3524 }, { "epoch": 0.2717391304347826, "grad_norm": 3.533284902572632, "learning_rate": 8.544790992449479e-06, "loss": 1.1181, "step": 3525 }, { "epoch": 0.27181621954979956, "grad_norm": 3.5303707122802734, "learning_rate": 8.543910413908967e-06, "loss": 0.9646, "step": 3526 }, { "epoch": 0.27189330866481654, "grad_norm": 3.93528413772583, "learning_rate": 8.543029614423994e-06, "loss": 1.0863, "step": 3527 }, { "epoch": 0.2719703977798335, "grad_norm": 3.518523931503296, "learning_rate": 8.542148594049475e-06, "loss": 1.0908, "step": 3528 }, { "epoch": 0.2720474868948504, "grad_norm": 3.6154558658599854, "learning_rate": 8.541267352840336e-06, "loss": 1.0267, "step": 3529 }, { "epoch": 0.2721245760098674, "grad_norm": 3.5488035678863525, "learning_rate": 8.54038589085152e-06, "loss": 1.0351, "step": 3530 }, { "epoch": 0.27220166512488436, "grad_norm": 3.697274923324585, "learning_rate": 8.539504208137977e-06, "loss": 1.0168, "step": 3531 }, { "epoch": 0.27227875423990133, "grad_norm": 3.5824666023254395, "learning_rate": 8.538622304754679e-06, "loss": 1.0174, "step": 3532 }, { "epoch": 0.2723558433549183, "grad_norm": 3.9082562923431396, "learning_rate": 8.537740180756608e-06, "loss": 1.1437, "step": 3533 }, { "epoch": 0.2724329324699352, "grad_norm": 3.7677783966064453, "learning_rate": 8.536857836198759e-06, "loss": 0.9499, "step": 3534 }, { "epoch": 0.2725100215849522, "grad_norm": 3.8008973598480225, "learning_rate": 8.535975271136142e-06, "loss": 0.9116, "step": 3535 }, { "epoch": 0.27258711069996916, "grad_norm": 3.5162312984466553, "learning_rate": 8.53509248562378e-06, "loss": 0.9755, "step": 3536 }, { "epoch": 0.27266419981498613, "grad_norm": 3.512768030166626, "learning_rate": 8.534209479716708e-06, "loss": 0.9291, "step": 3537 }, { "epoch": 0.2727412889300031, "grad_norm": 3.6765835285186768, "learning_rate": 8.533326253469983e-06, "loss": 0.9966, "step": 3538 }, { "epoch": 0.27281837804502, "grad_norm": 3.5264675617218018, "learning_rate": 8.532442806938663e-06, "loss": 0.985, "step": 3539 }, { "epoch": 0.272895467160037, "grad_norm": 3.555009365081787, "learning_rate": 8.531559140177828e-06, "loss": 1.0493, "step": 3540 }, { "epoch": 0.27297255627505396, "grad_norm": 3.558711528778076, "learning_rate": 8.530675253242573e-06, "loss": 0.8952, "step": 3541 }, { "epoch": 0.2730496453900709, "grad_norm": 3.6628801822662354, "learning_rate": 8.529791146188003e-06, "loss": 1.0614, "step": 3542 }, { "epoch": 0.2731267345050879, "grad_norm": 3.692908525466919, "learning_rate": 8.528906819069234e-06, "loss": 1.0244, "step": 3543 }, { "epoch": 0.27320382362010487, "grad_norm": 3.419480085372925, "learning_rate": 8.528022271941404e-06, "loss": 0.9742, "step": 3544 }, { "epoch": 0.2732809127351218, "grad_norm": 3.549691915512085, "learning_rate": 8.527137504859654e-06, "loss": 0.9707, "step": 3545 }, { "epoch": 0.27335800185013875, "grad_norm": 3.792904853820801, "learning_rate": 8.52625251787915e-06, "loss": 1.008, "step": 3546 }, { "epoch": 0.2734350909651557, "grad_norm": 3.560368776321411, "learning_rate": 8.525367311055063e-06, "loss": 0.961, "step": 3547 }, { "epoch": 0.2735121800801727, "grad_norm": 3.6131019592285156, "learning_rate": 8.524481884442583e-06, "loss": 0.9666, "step": 3548 }, { "epoch": 0.27358926919518967, "grad_norm": 3.8992390632629395, "learning_rate": 8.523596238096913e-06, "loss": 1.0684, "step": 3549 }, { "epoch": 0.2736663583102066, "grad_norm": 3.5087990760803223, "learning_rate": 8.522710372073265e-06, "loss": 0.9931, "step": 3550 }, { "epoch": 0.27374344742522355, "grad_norm": 4.000070095062256, "learning_rate": 8.521824286426872e-06, "loss": 1.0817, "step": 3551 }, { "epoch": 0.2738205365402405, "grad_norm": 3.854365825653076, "learning_rate": 8.520937981212973e-06, "loss": 0.9886, "step": 3552 }, { "epoch": 0.2738976256552575, "grad_norm": 3.266382932662964, "learning_rate": 8.520051456486827e-06, "loss": 0.9731, "step": 3553 }, { "epoch": 0.27397471477027446, "grad_norm": 3.62668514251709, "learning_rate": 8.519164712303703e-06, "loss": 1.0892, "step": 3554 }, { "epoch": 0.2740518038852914, "grad_norm": 3.941357135772705, "learning_rate": 8.518277748718887e-06, "loss": 1.0447, "step": 3555 }, { "epoch": 0.27412889300030835, "grad_norm": 3.726266860961914, "learning_rate": 8.517390565787672e-06, "loss": 1.0465, "step": 3556 }, { "epoch": 0.2742059821153253, "grad_norm": 3.8830811977386475, "learning_rate": 8.516503163565374e-06, "loss": 1.0176, "step": 3557 }, { "epoch": 0.2742830712303423, "grad_norm": 3.832172393798828, "learning_rate": 8.515615542107317e-06, "loss": 1.066, "step": 3558 }, { "epoch": 0.27436016034535926, "grad_norm": 3.768930196762085, "learning_rate": 8.514727701468837e-06, "loss": 1.2196, "step": 3559 }, { "epoch": 0.2744372494603762, "grad_norm": 3.7359375953674316, "learning_rate": 8.513839641705288e-06, "loss": 1.0866, "step": 3560 }, { "epoch": 0.27451433857539315, "grad_norm": 3.686213970184326, "learning_rate": 8.512951362872037e-06, "loss": 1.072, "step": 3561 }, { "epoch": 0.2745914276904101, "grad_norm": 3.735572338104248, "learning_rate": 8.512062865024463e-06, "loss": 0.9229, "step": 3562 }, { "epoch": 0.2746685168054271, "grad_norm": 3.594026803970337, "learning_rate": 8.511174148217958e-06, "loss": 1.0068, "step": 3563 }, { "epoch": 0.27474560592044406, "grad_norm": 3.5880775451660156, "learning_rate": 8.51028521250793e-06, "loss": 1.0385, "step": 3564 }, { "epoch": 0.274822695035461, "grad_norm": 3.8719637393951416, "learning_rate": 8.509396057949799e-06, "loss": 1.0011, "step": 3565 }, { "epoch": 0.27489978415047794, "grad_norm": 3.555837631225586, "learning_rate": 8.508506684598999e-06, "loss": 0.9061, "step": 3566 }, { "epoch": 0.2749768732654949, "grad_norm": 3.370540142059326, "learning_rate": 8.50761709251098e-06, "loss": 0.9201, "step": 3567 }, { "epoch": 0.2750539623805119, "grad_norm": 3.50242280960083, "learning_rate": 8.5067272817412e-06, "loss": 0.9497, "step": 3568 }, { "epoch": 0.27513105149552886, "grad_norm": 3.7081899642944336, "learning_rate": 8.505837252345135e-06, "loss": 0.9182, "step": 3569 }, { "epoch": 0.27520814061054577, "grad_norm": 3.7657527923583984, "learning_rate": 8.504947004378277e-06, "loss": 1.0016, "step": 3570 }, { "epoch": 0.27528522972556274, "grad_norm": 3.5126376152038574, "learning_rate": 8.504056537896123e-06, "loss": 0.93, "step": 3571 }, { "epoch": 0.2753623188405797, "grad_norm": 3.6629865169525146, "learning_rate": 8.503165852954193e-06, "loss": 1.0295, "step": 3572 }, { "epoch": 0.2754394079555967, "grad_norm": 3.717280149459839, "learning_rate": 8.502274949608018e-06, "loss": 1.0295, "step": 3573 }, { "epoch": 0.27551649707061365, "grad_norm": 4.10526180267334, "learning_rate": 8.501383827913137e-06, "loss": 1.1103, "step": 3574 }, { "epoch": 0.27559358618563057, "grad_norm": 3.4711949825286865, "learning_rate": 8.500492487925107e-06, "loss": 0.9718, "step": 3575 }, { "epoch": 0.27567067530064754, "grad_norm": 3.5179035663604736, "learning_rate": 8.499600929699501e-06, "loss": 1.0066, "step": 3576 }, { "epoch": 0.2757477644156645, "grad_norm": 3.881817579269409, "learning_rate": 8.498709153291901e-06, "loss": 1.0456, "step": 3577 }, { "epoch": 0.2758248535306815, "grad_norm": 4.031117916107178, "learning_rate": 8.497817158757906e-06, "loss": 1.0358, "step": 3578 }, { "epoch": 0.27590194264569845, "grad_norm": 4.566905498504639, "learning_rate": 8.49692494615313e-06, "loss": 1.0547, "step": 3579 }, { "epoch": 0.27597903176071537, "grad_norm": 3.682335138320923, "learning_rate": 8.49603251553319e-06, "loss": 1.0357, "step": 3580 }, { "epoch": 0.27605612087573234, "grad_norm": 3.5989487171173096, "learning_rate": 8.495139866953732e-06, "loss": 0.8995, "step": 3581 }, { "epoch": 0.2761332099907493, "grad_norm": 3.775329351425171, "learning_rate": 8.494247000470404e-06, "loss": 1.0149, "step": 3582 }, { "epoch": 0.2762102991057663, "grad_norm": 3.7653584480285645, "learning_rate": 8.493353916138873e-06, "loss": 1.0037, "step": 3583 }, { "epoch": 0.27628738822078325, "grad_norm": 4.354711532592773, "learning_rate": 8.492460614014816e-06, "loss": 1.0801, "step": 3584 }, { "epoch": 0.27636447733580016, "grad_norm": 3.7385101318359375, "learning_rate": 8.49156709415393e-06, "loss": 0.9805, "step": 3585 }, { "epoch": 0.27644156645081713, "grad_norm": 3.78240704536438, "learning_rate": 8.490673356611919e-06, "loss": 1.0614, "step": 3586 }, { "epoch": 0.2765186555658341, "grad_norm": 4.034103870391846, "learning_rate": 8.489779401444503e-06, "loss": 1.0574, "step": 3587 }, { "epoch": 0.2765957446808511, "grad_norm": 3.6883256435394287, "learning_rate": 8.488885228707413e-06, "loss": 1.165, "step": 3588 }, { "epoch": 0.27667283379586805, "grad_norm": 3.4828081130981445, "learning_rate": 8.4879908384564e-06, "loss": 0.924, "step": 3589 }, { "epoch": 0.27674992291088496, "grad_norm": 3.475923776626587, "learning_rate": 8.487096230747223e-06, "loss": 0.9768, "step": 3590 }, { "epoch": 0.27682701202590193, "grad_norm": 3.595651626586914, "learning_rate": 8.486201405635655e-06, "loss": 0.8966, "step": 3591 }, { "epoch": 0.2769041011409189, "grad_norm": 4.367059707641602, "learning_rate": 8.485306363177485e-06, "loss": 0.9618, "step": 3592 }, { "epoch": 0.27698119025593587, "grad_norm": 3.2522506713867188, "learning_rate": 8.484411103428516e-06, "loss": 0.9475, "step": 3593 }, { "epoch": 0.27705827937095284, "grad_norm": 3.634666919708252, "learning_rate": 8.48351562644456e-06, "loss": 1.0517, "step": 3594 }, { "epoch": 0.27713536848596976, "grad_norm": 3.7717621326446533, "learning_rate": 8.482619932281446e-06, "loss": 1.017, "step": 3595 }, { "epoch": 0.27721245760098673, "grad_norm": 3.5509636402130127, "learning_rate": 8.481724020995017e-06, "loss": 0.9433, "step": 3596 }, { "epoch": 0.2772895467160037, "grad_norm": 3.6704728603363037, "learning_rate": 8.480827892641125e-06, "loss": 0.991, "step": 3597 }, { "epoch": 0.27736663583102067, "grad_norm": 3.710653305053711, "learning_rate": 8.479931547275644e-06, "loss": 1.1041, "step": 3598 }, { "epoch": 0.27744372494603764, "grad_norm": 3.6124982833862305, "learning_rate": 8.479034984954454e-06, "loss": 0.9931, "step": 3599 }, { "epoch": 0.27752081406105455, "grad_norm": 3.4628562927246094, "learning_rate": 8.47813820573345e-06, "loss": 1.1006, "step": 3600 }, { "epoch": 0.2775979031760715, "grad_norm": 3.8903744220733643, "learning_rate": 8.477241209668543e-06, "loss": 1.072, "step": 3601 }, { "epoch": 0.2776749922910885, "grad_norm": 3.902669906616211, "learning_rate": 8.476343996815657e-06, "loss": 0.9923, "step": 3602 }, { "epoch": 0.27775208140610547, "grad_norm": 3.763310670852661, "learning_rate": 8.475446567230727e-06, "loss": 0.9965, "step": 3603 }, { "epoch": 0.27782917052112244, "grad_norm": 3.7086451053619385, "learning_rate": 8.4745489209697e-06, "loss": 1.0033, "step": 3604 }, { "epoch": 0.27790625963613935, "grad_norm": 3.9299726486206055, "learning_rate": 8.473651058088548e-06, "loss": 1.1179, "step": 3605 }, { "epoch": 0.2779833487511563, "grad_norm": 3.552938222885132, "learning_rate": 8.47275297864324e-06, "loss": 1.0242, "step": 3606 }, { "epoch": 0.2780604378661733, "grad_norm": 3.6222376823425293, "learning_rate": 8.47185468268977e-06, "loss": 1.0164, "step": 3607 }, { "epoch": 0.27813752698119026, "grad_norm": 4.27018928527832, "learning_rate": 8.470956170284141e-06, "loss": 1.0589, "step": 3608 }, { "epoch": 0.27821461609620723, "grad_norm": 3.718721628189087, "learning_rate": 8.470057441482374e-06, "loss": 0.9983, "step": 3609 }, { "epoch": 0.27829170521122415, "grad_norm": 3.726409673690796, "learning_rate": 8.469158496340496e-06, "loss": 1.028, "step": 3610 }, { "epoch": 0.2783687943262411, "grad_norm": 3.4664266109466553, "learning_rate": 8.46825933491455e-06, "loss": 0.9796, "step": 3611 }, { "epoch": 0.2784458834412581, "grad_norm": 3.7428462505340576, "learning_rate": 8.4673599572606e-06, "loss": 1.0337, "step": 3612 }, { "epoch": 0.27852297255627506, "grad_norm": 3.687169313430786, "learning_rate": 8.466460363434714e-06, "loss": 1.0132, "step": 3613 }, { "epoch": 0.27860006167129203, "grad_norm": 4.0733113288879395, "learning_rate": 8.465560553492978e-06, "loss": 0.9581, "step": 3614 }, { "epoch": 0.27867715078630895, "grad_norm": 3.739532709121704, "learning_rate": 8.46466052749149e-06, "loss": 0.9367, "step": 3615 }, { "epoch": 0.2787542399013259, "grad_norm": 3.9746146202087402, "learning_rate": 8.463760285486362e-06, "loss": 0.9157, "step": 3616 }, { "epoch": 0.2788313290163429, "grad_norm": 3.625314235687256, "learning_rate": 8.462859827533718e-06, "loss": 0.9241, "step": 3617 }, { "epoch": 0.27890841813135986, "grad_norm": 3.546565532684326, "learning_rate": 8.4619591536897e-06, "loss": 0.9287, "step": 3618 }, { "epoch": 0.27898550724637683, "grad_norm": 3.9717471599578857, "learning_rate": 8.46105826401046e-06, "loss": 1.0355, "step": 3619 }, { "epoch": 0.27906259636139374, "grad_norm": 3.866204261779785, "learning_rate": 8.46015715855216e-06, "loss": 1.0423, "step": 3620 }, { "epoch": 0.2791396854764107, "grad_norm": 3.634885549545288, "learning_rate": 8.459255837370984e-06, "loss": 1.0229, "step": 3621 }, { "epoch": 0.2792167745914277, "grad_norm": 3.85805344581604, "learning_rate": 8.45835430052312e-06, "loss": 0.9998, "step": 3622 }, { "epoch": 0.27929386370644466, "grad_norm": 3.9002537727355957, "learning_rate": 8.457452548064778e-06, "loss": 0.9979, "step": 3623 }, { "epoch": 0.2793709528214616, "grad_norm": 3.3640360832214355, "learning_rate": 8.456550580052177e-06, "loss": 0.8977, "step": 3624 }, { "epoch": 0.27944804193647854, "grad_norm": 3.474541664123535, "learning_rate": 8.455648396541548e-06, "loss": 1.0045, "step": 3625 }, { "epoch": 0.2795251310514955, "grad_norm": 3.5307717323303223, "learning_rate": 8.454745997589139e-06, "loss": 0.9458, "step": 3626 }, { "epoch": 0.2796022201665125, "grad_norm": 3.8024139404296875, "learning_rate": 8.45384338325121e-06, "loss": 1.0375, "step": 3627 }, { "epoch": 0.27967930928152945, "grad_norm": 3.663609027862549, "learning_rate": 8.452940553584032e-06, "loss": 0.8606, "step": 3628 }, { "epoch": 0.2797563983965464, "grad_norm": 3.6172802448272705, "learning_rate": 8.452037508643897e-06, "loss": 0.9782, "step": 3629 }, { "epoch": 0.2798334875115634, "grad_norm": 3.8547043800354004, "learning_rate": 8.4511342484871e-06, "loss": 0.9364, "step": 3630 }, { "epoch": 0.2799105766265803, "grad_norm": 3.862354040145874, "learning_rate": 8.450230773169958e-06, "loss": 1.0593, "step": 3631 }, { "epoch": 0.2799876657415973, "grad_norm": 3.5931904315948486, "learning_rate": 8.449327082748794e-06, "loss": 1.0177, "step": 3632 }, { "epoch": 0.28006475485661425, "grad_norm": 3.79412579536438, "learning_rate": 8.448423177279954e-06, "loss": 1.0407, "step": 3633 }, { "epoch": 0.2801418439716312, "grad_norm": 3.9364240169525146, "learning_rate": 8.447519056819787e-06, "loss": 1.0459, "step": 3634 }, { "epoch": 0.2802189330866482, "grad_norm": 3.5306715965270996, "learning_rate": 8.446614721424661e-06, "loss": 1.0045, "step": 3635 }, { "epoch": 0.2802960222016651, "grad_norm": 4.1420111656188965, "learning_rate": 8.445710171150957e-06, "loss": 1.1036, "step": 3636 }, { "epoch": 0.2803731113166821, "grad_norm": 3.604893684387207, "learning_rate": 8.444805406055072e-06, "loss": 0.9265, "step": 3637 }, { "epoch": 0.28045020043169905, "grad_norm": 3.910055637359619, "learning_rate": 8.44390042619341e-06, "loss": 1.0674, "step": 3638 }, { "epoch": 0.280527289546716, "grad_norm": 4.729837894439697, "learning_rate": 8.442995231622393e-06, "loss": 1.0476, "step": 3639 }, { "epoch": 0.280604378661733, "grad_norm": 3.926619529724121, "learning_rate": 8.442089822398456e-06, "loss": 0.9571, "step": 3640 }, { "epoch": 0.2806814677767499, "grad_norm": 3.6743786334991455, "learning_rate": 8.441184198578044e-06, "loss": 1.0757, "step": 3641 }, { "epoch": 0.2807585568917669, "grad_norm": 3.5217857360839844, "learning_rate": 8.44027836021762e-06, "loss": 1.0539, "step": 3642 }, { "epoch": 0.28083564600678385, "grad_norm": 3.6347432136535645, "learning_rate": 8.439372307373658e-06, "loss": 0.9301, "step": 3643 }, { "epoch": 0.2809127351218008, "grad_norm": 4.677338123321533, "learning_rate": 8.438466040102647e-06, "loss": 0.9828, "step": 3644 }, { "epoch": 0.2809898242368178, "grad_norm": 3.5795114040374756, "learning_rate": 8.437559558461085e-06, "loss": 0.8683, "step": 3645 }, { "epoch": 0.2810669133518347, "grad_norm": 3.54679799079895, "learning_rate": 8.436652862505488e-06, "loss": 1.0562, "step": 3646 }, { "epoch": 0.2811440024668517, "grad_norm": 4.047598838806152, "learning_rate": 8.435745952292384e-06, "loss": 0.981, "step": 3647 }, { "epoch": 0.28122109158186864, "grad_norm": 3.662014961242676, "learning_rate": 8.434838827878315e-06, "loss": 0.9972, "step": 3648 }, { "epoch": 0.2812981806968856, "grad_norm": 3.410169839859009, "learning_rate": 8.433931489319835e-06, "loss": 0.9462, "step": 3649 }, { "epoch": 0.2813752698119026, "grad_norm": 3.725191354751587, "learning_rate": 8.43302393667351e-06, "loss": 0.9657, "step": 3650 }, { "epoch": 0.2814523589269195, "grad_norm": 3.510698080062866, "learning_rate": 8.432116169995923e-06, "loss": 1.0496, "step": 3651 }, { "epoch": 0.28152944804193647, "grad_norm": 3.6888866424560547, "learning_rate": 8.43120818934367e-06, "loss": 0.9789, "step": 3652 }, { "epoch": 0.28160653715695344, "grad_norm": 3.4661200046539307, "learning_rate": 8.430299994773354e-06, "loss": 0.9091, "step": 3653 }, { "epoch": 0.2816836262719704, "grad_norm": 3.9719278812408447, "learning_rate": 8.429391586341602e-06, "loss": 1.0466, "step": 3654 }, { "epoch": 0.2817607153869874, "grad_norm": 3.836193323135376, "learning_rate": 8.428482964105043e-06, "loss": 1.0741, "step": 3655 }, { "epoch": 0.2818378045020043, "grad_norm": 3.6839404106140137, "learning_rate": 8.427574128120331e-06, "loss": 0.9394, "step": 3656 }, { "epoch": 0.28191489361702127, "grad_norm": 3.4464738368988037, "learning_rate": 8.42666507844412e-06, "loss": 1.0155, "step": 3657 }, { "epoch": 0.28199198273203824, "grad_norm": 4.588211536407471, "learning_rate": 8.425755815133092e-06, "loss": 1.1744, "step": 3658 }, { "epoch": 0.2820690718470552, "grad_norm": 4.032186508178711, "learning_rate": 8.42484633824393e-06, "loss": 1.0072, "step": 3659 }, { "epoch": 0.2821461609620722, "grad_norm": 3.7464022636413574, "learning_rate": 8.423936647833337e-06, "loss": 0.9378, "step": 3660 }, { "epoch": 0.2822232500770891, "grad_norm": 3.7981278896331787, "learning_rate": 8.423026743958028e-06, "loss": 1.0546, "step": 3661 }, { "epoch": 0.28230033919210606, "grad_norm": 4.554448127746582, "learning_rate": 8.422116626674728e-06, "loss": 1.037, "step": 3662 }, { "epoch": 0.28237742830712304, "grad_norm": 3.7701575756073, "learning_rate": 8.42120629604018e-06, "loss": 1.0272, "step": 3663 }, { "epoch": 0.28245451742214, "grad_norm": 3.8832554817199707, "learning_rate": 8.420295752111138e-06, "loss": 1.0422, "step": 3664 }, { "epoch": 0.282531606537157, "grad_norm": 4.044317245483398, "learning_rate": 8.41938499494437e-06, "loss": 1.0457, "step": 3665 }, { "epoch": 0.2826086956521739, "grad_norm": 3.7039005756378174, "learning_rate": 8.418474024596659e-06, "loss": 0.9673, "step": 3666 }, { "epoch": 0.28268578476719086, "grad_norm": 3.4527359008789062, "learning_rate": 8.417562841124797e-06, "loss": 0.9998, "step": 3667 }, { "epoch": 0.28276287388220783, "grad_norm": 4.247575759887695, "learning_rate": 8.416651444585591e-06, "loss": 1.0611, "step": 3668 }, { "epoch": 0.2828399629972248, "grad_norm": 3.4777634143829346, "learning_rate": 8.415739835035864e-06, "loss": 0.9773, "step": 3669 }, { "epoch": 0.2829170521122418, "grad_norm": 4.208752155303955, "learning_rate": 8.414828012532446e-06, "loss": 0.9631, "step": 3670 }, { "epoch": 0.2829941412272587, "grad_norm": 3.4110782146453857, "learning_rate": 8.41391597713219e-06, "loss": 1.0187, "step": 3671 }, { "epoch": 0.28307123034227566, "grad_norm": 3.7919259071350098, "learning_rate": 8.413003728891953e-06, "loss": 1.1607, "step": 3672 }, { "epoch": 0.28314831945729263, "grad_norm": 3.560163736343384, "learning_rate": 8.41209126786861e-06, "loss": 0.9773, "step": 3673 }, { "epoch": 0.2832254085723096, "grad_norm": 3.5800511837005615, "learning_rate": 8.411178594119046e-06, "loss": 0.8893, "step": 3674 }, { "epoch": 0.28330249768732657, "grad_norm": 4.5871901512146, "learning_rate": 8.410265707700167e-06, "loss": 1.1218, "step": 3675 }, { "epoch": 0.2833795868023435, "grad_norm": 3.5251963138580322, "learning_rate": 8.409352608668882e-06, "loss": 1.1023, "step": 3676 }, { "epoch": 0.28345667591736046, "grad_norm": 3.715285539627075, "learning_rate": 8.408439297082118e-06, "loss": 0.9962, "step": 3677 }, { "epoch": 0.2835337650323774, "grad_norm": 3.63982892036438, "learning_rate": 8.407525772996818e-06, "loss": 1.043, "step": 3678 }, { "epoch": 0.2836108541473944, "grad_norm": 4.022475242614746, "learning_rate": 8.406612036469935e-06, "loss": 1.2085, "step": 3679 }, { "epoch": 0.28368794326241137, "grad_norm": 3.487618923187256, "learning_rate": 8.405698087558432e-06, "loss": 1.0158, "step": 3680 }, { "epoch": 0.2837650323774283, "grad_norm": 3.937352180480957, "learning_rate": 8.404783926319295e-06, "loss": 1.0598, "step": 3681 }, { "epoch": 0.28384212149244525, "grad_norm": 3.6437175273895264, "learning_rate": 8.403869552809512e-06, "loss": 0.9678, "step": 3682 }, { "epoch": 0.2839192106074622, "grad_norm": 4.383546829223633, "learning_rate": 8.402954967086093e-06, "loss": 1.051, "step": 3683 }, { "epoch": 0.2839962997224792, "grad_norm": 4.169625282287598, "learning_rate": 8.402040169206054e-06, "loss": 1.0238, "step": 3684 }, { "epoch": 0.28407338883749617, "grad_norm": 3.8336832523345947, "learning_rate": 8.40112515922643e-06, "loss": 0.9547, "step": 3685 }, { "epoch": 0.2841504779525131, "grad_norm": 3.8428425788879395, "learning_rate": 8.40020993720427e-06, "loss": 0.9351, "step": 3686 }, { "epoch": 0.28422756706753005, "grad_norm": 3.674729108810425, "learning_rate": 8.399294503196629e-06, "loss": 1.0025, "step": 3687 }, { "epoch": 0.284304656182547, "grad_norm": 3.847797155380249, "learning_rate": 8.398378857260581e-06, "loss": 1.0309, "step": 3688 }, { "epoch": 0.284381745297564, "grad_norm": 3.6674485206604004, "learning_rate": 8.397462999453212e-06, "loss": 0.9968, "step": 3689 }, { "epoch": 0.28445883441258096, "grad_norm": 4.155699253082275, "learning_rate": 8.39654692983162e-06, "loss": 1.1423, "step": 3690 }, { "epoch": 0.2845359235275979, "grad_norm": 3.99698543548584, "learning_rate": 8.395630648452919e-06, "loss": 1.0867, "step": 3691 }, { "epoch": 0.28461301264261485, "grad_norm": 3.8566694259643555, "learning_rate": 8.394714155374234e-06, "loss": 0.9227, "step": 3692 }, { "epoch": 0.2846901017576318, "grad_norm": 3.5863726139068604, "learning_rate": 8.393797450652701e-06, "loss": 0.9605, "step": 3693 }, { "epoch": 0.2847671908726488, "grad_norm": 3.88511323928833, "learning_rate": 8.392880534345477e-06, "loss": 0.9894, "step": 3694 }, { "epoch": 0.28484427998766576, "grad_norm": 3.3645994663238525, "learning_rate": 8.391963406509721e-06, "loss": 1.0001, "step": 3695 }, { "epoch": 0.2849213691026827, "grad_norm": 3.5766923427581787, "learning_rate": 8.391046067202617e-06, "loss": 0.9889, "step": 3696 }, { "epoch": 0.28499845821769965, "grad_norm": 4.069863796234131, "learning_rate": 8.390128516481351e-06, "loss": 0.9495, "step": 3697 }, { "epoch": 0.2850755473327166, "grad_norm": 3.9088964462280273, "learning_rate": 8.389210754403132e-06, "loss": 1.0693, "step": 3698 }, { "epoch": 0.2851526364477336, "grad_norm": 4.604135990142822, "learning_rate": 8.388292781025173e-06, "loss": 1.1569, "step": 3699 }, { "epoch": 0.28522972556275056, "grad_norm": 3.65946364402771, "learning_rate": 8.38737459640471e-06, "loss": 0.9248, "step": 3700 }, { "epoch": 0.2853068146777675, "grad_norm": 3.6877028942108154, "learning_rate": 8.386456200598982e-06, "loss": 1.1605, "step": 3701 }, { "epoch": 0.28538390379278444, "grad_norm": 3.4884543418884277, "learning_rate": 8.38553759366525e-06, "loss": 1.0238, "step": 3702 }, { "epoch": 0.2854609929078014, "grad_norm": 3.4332854747772217, "learning_rate": 8.384618775660784e-06, "loss": 1.0155, "step": 3703 }, { "epoch": 0.2855380820228184, "grad_norm": 4.001807689666748, "learning_rate": 8.383699746642866e-06, "loss": 1.1088, "step": 3704 }, { "epoch": 0.28561517113783536, "grad_norm": 3.441525459289551, "learning_rate": 8.382780506668792e-06, "loss": 0.9096, "step": 3705 }, { "epoch": 0.28569226025285227, "grad_norm": 3.314866542816162, "learning_rate": 8.381861055795876e-06, "loss": 1.0886, "step": 3706 }, { "epoch": 0.28576934936786924, "grad_norm": 4.307245254516602, "learning_rate": 8.380941394081437e-06, "loss": 1.0525, "step": 3707 }, { "epoch": 0.2858464384828862, "grad_norm": 3.5689730644226074, "learning_rate": 8.380021521582813e-06, "loss": 1.0542, "step": 3708 }, { "epoch": 0.2859235275979032, "grad_norm": 3.881000518798828, "learning_rate": 8.379101438357352e-06, "loss": 0.9946, "step": 3709 }, { "epoch": 0.28600061671292015, "grad_norm": 4.110035419464111, "learning_rate": 8.378181144462418e-06, "loss": 1.0302, "step": 3710 }, { "epoch": 0.28607770582793707, "grad_norm": 3.9843645095825195, "learning_rate": 8.377260639955385e-06, "loss": 1.026, "step": 3711 }, { "epoch": 0.28615479494295404, "grad_norm": 3.5178589820861816, "learning_rate": 8.376339924893642e-06, "loss": 0.9419, "step": 3712 }, { "epoch": 0.286231884057971, "grad_norm": 3.800537109375, "learning_rate": 8.375418999334591e-06, "loss": 0.9474, "step": 3713 }, { "epoch": 0.286308973172988, "grad_norm": 3.798588991165161, "learning_rate": 8.374497863335649e-06, "loss": 1.0826, "step": 3714 }, { "epoch": 0.28638606228800495, "grad_norm": 3.9119489192962646, "learning_rate": 8.37357651695424e-06, "loss": 1.0957, "step": 3715 }, { "epoch": 0.2864631514030219, "grad_norm": 3.5828166007995605, "learning_rate": 8.37265496024781e-06, "loss": 0.951, "step": 3716 }, { "epoch": 0.28654024051803884, "grad_norm": 3.7555758953094482, "learning_rate": 8.371733193273808e-06, "loss": 0.9873, "step": 3717 }, { "epoch": 0.2866173296330558, "grad_norm": 3.638547897338867, "learning_rate": 8.370811216089705e-06, "loss": 1.1217, "step": 3718 }, { "epoch": 0.2866944187480728, "grad_norm": 3.6661555767059326, "learning_rate": 8.36988902875298e-06, "loss": 1.0319, "step": 3719 }, { "epoch": 0.28677150786308975, "grad_norm": 3.355048179626465, "learning_rate": 8.36896663132113e-06, "loss": 0.9512, "step": 3720 }, { "epoch": 0.2868485969781067, "grad_norm": 3.8450980186462402, "learning_rate": 8.368044023851656e-06, "loss": 1.0523, "step": 3721 }, { "epoch": 0.28692568609312363, "grad_norm": 3.896583318710327, "learning_rate": 8.36712120640208e-06, "loss": 1.0813, "step": 3722 }, { "epoch": 0.2870027752081406, "grad_norm": 3.890866279602051, "learning_rate": 8.366198179029937e-06, "loss": 1.0645, "step": 3723 }, { "epoch": 0.2870798643231576, "grad_norm": 3.7487337589263916, "learning_rate": 8.365274941792771e-06, "loss": 0.9914, "step": 3724 }, { "epoch": 0.28715695343817454, "grad_norm": 3.695530652999878, "learning_rate": 8.364351494748141e-06, "loss": 1.011, "step": 3725 }, { "epoch": 0.2872340425531915, "grad_norm": 3.758422613143921, "learning_rate": 8.363427837953622e-06, "loss": 1.0366, "step": 3726 }, { "epoch": 0.28731113166820843, "grad_norm": 4.224013328552246, "learning_rate": 8.362503971466795e-06, "loss": 1.0637, "step": 3727 }, { "epoch": 0.2873882207832254, "grad_norm": 4.0081305503845215, "learning_rate": 8.361579895345263e-06, "loss": 1.133, "step": 3728 }, { "epoch": 0.28746530989824237, "grad_norm": 3.7337818145751953, "learning_rate": 8.36065560964663e-06, "loss": 0.9504, "step": 3729 }, { "epoch": 0.28754239901325934, "grad_norm": 3.62839412689209, "learning_rate": 8.359731114428529e-06, "loss": 1.093, "step": 3730 }, { "epoch": 0.2876194881282763, "grad_norm": 3.5839309692382812, "learning_rate": 8.358806409748592e-06, "loss": 1.0897, "step": 3731 }, { "epoch": 0.28769657724329323, "grad_norm": 3.644613265991211, "learning_rate": 8.35788149566447e-06, "loss": 1.0344, "step": 3732 }, { "epoch": 0.2877736663583102, "grad_norm": 3.906480073928833, "learning_rate": 8.356956372233829e-06, "loss": 1.092, "step": 3733 }, { "epoch": 0.28785075547332717, "grad_norm": 3.4734041690826416, "learning_rate": 8.356031039514344e-06, "loss": 1.0349, "step": 3734 }, { "epoch": 0.28792784458834414, "grad_norm": 3.4001238346099854, "learning_rate": 8.355105497563705e-06, "loss": 0.9855, "step": 3735 }, { "epoch": 0.2880049337033611, "grad_norm": 3.7522032260894775, "learning_rate": 8.354179746439614e-06, "loss": 1.0141, "step": 3736 }, { "epoch": 0.288082022818378, "grad_norm": 3.7555811405181885, "learning_rate": 8.353253786199788e-06, "loss": 1.0243, "step": 3737 }, { "epoch": 0.288159111933395, "grad_norm": 3.771195411682129, "learning_rate": 8.352327616901956e-06, "loss": 0.9578, "step": 3738 }, { "epoch": 0.28823620104841197, "grad_norm": 3.630093574523926, "learning_rate": 8.35140123860386e-06, "loss": 1.0626, "step": 3739 }, { "epoch": 0.28831329016342894, "grad_norm": 3.6648457050323486, "learning_rate": 8.350474651363254e-06, "loss": 1.1257, "step": 3740 }, { "epoch": 0.2883903792784459, "grad_norm": 3.7269270420074463, "learning_rate": 8.349547855237904e-06, "loss": 1.0953, "step": 3741 }, { "epoch": 0.2884674683934628, "grad_norm": 3.8183059692382812, "learning_rate": 8.348620850285594e-06, "loss": 1.1094, "step": 3742 }, { "epoch": 0.2885445575084798, "grad_norm": 3.764166831970215, "learning_rate": 8.347693636564119e-06, "loss": 0.9972, "step": 3743 }, { "epoch": 0.28862164662349676, "grad_norm": 3.2361299991607666, "learning_rate": 8.34676621413128e-06, "loss": 0.8723, "step": 3744 }, { "epoch": 0.28869873573851373, "grad_norm": 4.178921699523926, "learning_rate": 8.345838583044903e-06, "loss": 0.8852, "step": 3745 }, { "epoch": 0.2887758248535307, "grad_norm": 4.049960136413574, "learning_rate": 8.344910743362819e-06, "loss": 1.1337, "step": 3746 }, { "epoch": 0.2888529139685476, "grad_norm": 4.307690143585205, "learning_rate": 8.343982695142873e-06, "loss": 1.0333, "step": 3747 }, { "epoch": 0.2889300030835646, "grad_norm": 3.709958791732788, "learning_rate": 8.343054438442926e-06, "loss": 1.0488, "step": 3748 }, { "epoch": 0.28900709219858156, "grad_norm": 3.61205792427063, "learning_rate": 8.342125973320848e-06, "loss": 0.9665, "step": 3749 }, { "epoch": 0.28908418131359853, "grad_norm": 3.7218470573425293, "learning_rate": 8.341197299834524e-06, "loss": 1.0409, "step": 3750 }, { "epoch": 0.2891612704286155, "grad_norm": 3.4968018531799316, "learning_rate": 8.340268418041855e-06, "loss": 0.9222, "step": 3751 }, { "epoch": 0.2892383595436324, "grad_norm": 3.451004981994629, "learning_rate": 8.339339328000749e-06, "loss": 1.0035, "step": 3752 }, { "epoch": 0.2893154486586494, "grad_norm": 3.4331042766571045, "learning_rate": 8.338410029769133e-06, "loss": 0.9606, "step": 3753 }, { "epoch": 0.28939253777366636, "grad_norm": 3.4850504398345947, "learning_rate": 8.337480523404938e-06, "loss": 0.9549, "step": 3754 }, { "epoch": 0.28946962688868333, "grad_norm": 3.5593273639678955, "learning_rate": 8.33655080896612e-06, "loss": 1.0124, "step": 3755 }, { "epoch": 0.2895467160037003, "grad_norm": 3.5251691341400146, "learning_rate": 8.335620886510637e-06, "loss": 0.9167, "step": 3756 }, { "epoch": 0.2896238051187172, "grad_norm": 3.627453088760376, "learning_rate": 8.33469075609647e-06, "loss": 0.9891, "step": 3757 }, { "epoch": 0.2897008942337342, "grad_norm": 3.599273204803467, "learning_rate": 8.333760417781605e-06, "loss": 1.0972, "step": 3758 }, { "epoch": 0.28977798334875116, "grad_norm": 3.6854488849639893, "learning_rate": 8.332829871624042e-06, "loss": 1.1368, "step": 3759 }, { "epoch": 0.2898550724637681, "grad_norm": 3.5526113510131836, "learning_rate": 8.331899117681799e-06, "loss": 0.9347, "step": 3760 }, { "epoch": 0.2899321615787851, "grad_norm": 3.6977930068969727, "learning_rate": 8.330968156012902e-06, "loss": 0.9981, "step": 3761 }, { "epoch": 0.290009250693802, "grad_norm": 3.659409761428833, "learning_rate": 8.330036986675392e-06, "loss": 1.023, "step": 3762 }, { "epoch": 0.290086339808819, "grad_norm": 4.15533971786499, "learning_rate": 8.329105609727323e-06, "loss": 1.018, "step": 3763 }, { "epoch": 0.29016342892383595, "grad_norm": 3.824834108352661, "learning_rate": 8.32817402522676e-06, "loss": 0.8737, "step": 3764 }, { "epoch": 0.2902405180388529, "grad_norm": 3.59409499168396, "learning_rate": 8.327242233231784e-06, "loss": 1.0568, "step": 3765 }, { "epoch": 0.2903176071538699, "grad_norm": 3.5583786964416504, "learning_rate": 8.326310233800488e-06, "loss": 0.9326, "step": 3766 }, { "epoch": 0.2903946962688868, "grad_norm": 3.7277417182922363, "learning_rate": 8.325378026990976e-06, "loss": 0.972, "step": 3767 }, { "epoch": 0.2904717853839038, "grad_norm": 3.8586301803588867, "learning_rate": 8.324445612861367e-06, "loss": 1.0652, "step": 3768 }, { "epoch": 0.29054887449892075, "grad_norm": 3.91520357131958, "learning_rate": 8.32351299146979e-06, "loss": 0.9839, "step": 3769 }, { "epoch": 0.2906259636139377, "grad_norm": 3.715453863143921, "learning_rate": 8.322580162874393e-06, "loss": 1.1271, "step": 3770 }, { "epoch": 0.2907030527289547, "grad_norm": 3.917518138885498, "learning_rate": 8.321647127133327e-06, "loss": 1.0822, "step": 3771 }, { "epoch": 0.2907801418439716, "grad_norm": 4.15562629699707, "learning_rate": 8.320713884304769e-06, "loss": 0.9178, "step": 3772 }, { "epoch": 0.2908572309589886, "grad_norm": 3.6164376735687256, "learning_rate": 8.3197804344469e-06, "loss": 0.9087, "step": 3773 }, { "epoch": 0.29093432007400555, "grad_norm": 3.697296380996704, "learning_rate": 8.318846777617913e-06, "loss": 1.0643, "step": 3774 }, { "epoch": 0.2910114091890225, "grad_norm": 3.5980827808380127, "learning_rate": 8.317912913876019e-06, "loss": 1.009, "step": 3775 }, { "epoch": 0.2910884983040395, "grad_norm": 3.787522077560425, "learning_rate": 8.316978843279438e-06, "loss": 1.124, "step": 3776 }, { "epoch": 0.2911655874190564, "grad_norm": 3.599940299987793, "learning_rate": 8.316044565886405e-06, "loss": 1.0432, "step": 3777 }, { "epoch": 0.2912426765340734, "grad_norm": 3.8612630367279053, "learning_rate": 8.315110081755166e-06, "loss": 0.9916, "step": 3778 }, { "epoch": 0.29131976564909035, "grad_norm": 3.687645673751831, "learning_rate": 8.314175390943987e-06, "loss": 1.0088, "step": 3779 }, { "epoch": 0.2913968547641073, "grad_norm": 3.6906330585479736, "learning_rate": 8.313240493511132e-06, "loss": 0.9998, "step": 3780 }, { "epoch": 0.2914739438791243, "grad_norm": 3.6079602241516113, "learning_rate": 8.312305389514894e-06, "loss": 1.0303, "step": 3781 }, { "epoch": 0.2915510329941412, "grad_norm": 3.642810583114624, "learning_rate": 8.31137007901357e-06, "loss": 0.9044, "step": 3782 }, { "epoch": 0.29162812210915817, "grad_norm": 3.4344608783721924, "learning_rate": 8.310434562065472e-06, "loss": 0.9379, "step": 3783 }, { "epoch": 0.29170521122417514, "grad_norm": 3.8744451999664307, "learning_rate": 8.309498838728924e-06, "loss": 1.1047, "step": 3784 }, { "epoch": 0.2917823003391921, "grad_norm": 3.5419445037841797, "learning_rate": 8.308562909062264e-06, "loss": 1.0705, "step": 3785 }, { "epoch": 0.2918593894542091, "grad_norm": 3.666670560836792, "learning_rate": 8.307626773123842e-06, "loss": 1.0101, "step": 3786 }, { "epoch": 0.291936478569226, "grad_norm": 3.46775221824646, "learning_rate": 8.306690430972023e-06, "loss": 0.983, "step": 3787 }, { "epoch": 0.29201356768424297, "grad_norm": 3.711484909057617, "learning_rate": 8.305753882665178e-06, "loss": 0.97, "step": 3788 }, { "epoch": 0.29209065679925994, "grad_norm": 3.968447685241699, "learning_rate": 8.304817128261702e-06, "loss": 1.0543, "step": 3789 }, { "epoch": 0.2921677459142769, "grad_norm": 3.9683480262756348, "learning_rate": 8.303880167819994e-06, "loss": 0.97, "step": 3790 }, { "epoch": 0.2922448350292939, "grad_norm": 3.354612112045288, "learning_rate": 8.302943001398466e-06, "loss": 1.0434, "step": 3791 }, { "epoch": 0.2923219241443108, "grad_norm": 4.114500999450684, "learning_rate": 8.302005629055549e-06, "loss": 1.0238, "step": 3792 }, { "epoch": 0.29239901325932777, "grad_norm": 3.511687994003296, "learning_rate": 8.301068050849685e-06, "loss": 0.9847, "step": 3793 }, { "epoch": 0.29247610237434474, "grad_norm": 3.513735294342041, "learning_rate": 8.300130266839323e-06, "loss": 0.9874, "step": 3794 }, { "epoch": 0.2925531914893617, "grad_norm": 3.9487833976745605, "learning_rate": 8.29919227708293e-06, "loss": 1.0424, "step": 3795 }, { "epoch": 0.2926302806043787, "grad_norm": 3.702522039413452, "learning_rate": 8.298254081638988e-06, "loss": 0.9582, "step": 3796 }, { "epoch": 0.2927073697193956, "grad_norm": 4.400388240814209, "learning_rate": 8.297315680565984e-06, "loss": 1.0504, "step": 3797 }, { "epoch": 0.29278445883441256, "grad_norm": 3.7499802112579346, "learning_rate": 8.296377073922427e-06, "loss": 1.0402, "step": 3798 }, { "epoch": 0.29286154794942953, "grad_norm": 3.343437433242798, "learning_rate": 8.295438261766829e-06, "loss": 0.8718, "step": 3799 }, { "epoch": 0.2929386370644465, "grad_norm": 3.8932011127471924, "learning_rate": 8.294499244157724e-06, "loss": 1.0511, "step": 3800 }, { "epoch": 0.2930157261794635, "grad_norm": 3.75187087059021, "learning_rate": 8.293560021153652e-06, "loss": 0.9994, "step": 3801 }, { "epoch": 0.29309281529448045, "grad_norm": 4.034709930419922, "learning_rate": 8.292620592813173e-06, "loss": 1.0376, "step": 3802 }, { "epoch": 0.29316990440949736, "grad_norm": 3.411324977874756, "learning_rate": 8.291680959194852e-06, "loss": 0.9321, "step": 3803 }, { "epoch": 0.29324699352451433, "grad_norm": 3.779232978820801, "learning_rate": 8.29074112035727e-06, "loss": 1.0499, "step": 3804 }, { "epoch": 0.2933240826395313, "grad_norm": 4.236926555633545, "learning_rate": 8.289801076359025e-06, "loss": 1.0988, "step": 3805 }, { "epoch": 0.2934011717545483, "grad_norm": 3.3468666076660156, "learning_rate": 8.28886082725872e-06, "loss": 0.9899, "step": 3806 }, { "epoch": 0.29347826086956524, "grad_norm": 3.675314426422119, "learning_rate": 8.287920373114976e-06, "loss": 0.9013, "step": 3807 }, { "epoch": 0.29355534998458216, "grad_norm": 4.145412445068359, "learning_rate": 8.286979713986426e-06, "loss": 1.0496, "step": 3808 }, { "epoch": 0.29363243909959913, "grad_norm": 4.000509738922119, "learning_rate": 8.286038849931713e-06, "loss": 1.0509, "step": 3809 }, { "epoch": 0.2937095282146161, "grad_norm": 3.838998794555664, "learning_rate": 8.285097781009497e-06, "loss": 0.9234, "step": 3810 }, { "epoch": 0.29378661732963307, "grad_norm": 3.82969069480896, "learning_rate": 8.284156507278448e-06, "loss": 1.0399, "step": 3811 }, { "epoch": 0.29386370644465004, "grad_norm": 3.689297914505005, "learning_rate": 8.283215028797252e-06, "loss": 0.886, "step": 3812 }, { "epoch": 0.29394079555966696, "grad_norm": 3.5808119773864746, "learning_rate": 8.2822733456246e-06, "loss": 1.0377, "step": 3813 }, { "epoch": 0.2940178846746839, "grad_norm": 4.0584821701049805, "learning_rate": 8.281331457819204e-06, "loss": 1.1038, "step": 3814 }, { "epoch": 0.2940949737897009, "grad_norm": 4.134930610656738, "learning_rate": 8.28038936543979e-06, "loss": 0.9839, "step": 3815 }, { "epoch": 0.29417206290471787, "grad_norm": 3.6218936443328857, "learning_rate": 8.279447068545085e-06, "loss": 1.027, "step": 3816 }, { "epoch": 0.29424915201973484, "grad_norm": 4.330170154571533, "learning_rate": 8.27850456719384e-06, "loss": 1.103, "step": 3817 }, { "epoch": 0.29432624113475175, "grad_norm": 3.6379551887512207, "learning_rate": 8.277561861444818e-06, "loss": 1.0382, "step": 3818 }, { "epoch": 0.2944033302497687, "grad_norm": 3.776359796524048, "learning_rate": 8.276618951356787e-06, "loss": 1.0924, "step": 3819 }, { "epoch": 0.2944804193647857, "grad_norm": 3.7215919494628906, "learning_rate": 8.275675836988535e-06, "loss": 1.1014, "step": 3820 }, { "epoch": 0.29455750847980267, "grad_norm": 3.9651782512664795, "learning_rate": 8.27473251839886e-06, "loss": 1.0527, "step": 3821 }, { "epoch": 0.29463459759481964, "grad_norm": 3.5446691513061523, "learning_rate": 8.273788995646571e-06, "loss": 0.9641, "step": 3822 }, { "epoch": 0.29471168670983655, "grad_norm": 3.6998002529144287, "learning_rate": 8.272845268790494e-06, "loss": 1.1796, "step": 3823 }, { "epoch": 0.2947887758248535, "grad_norm": 3.6286091804504395, "learning_rate": 8.271901337889468e-06, "loss": 1.0408, "step": 3824 }, { "epoch": 0.2948658649398705, "grad_norm": 3.6755175590515137, "learning_rate": 8.270957203002337e-06, "loss": 0.9034, "step": 3825 }, { "epoch": 0.29494295405488746, "grad_norm": 3.6150128841400146, "learning_rate": 8.270012864187965e-06, "loss": 0.9529, "step": 3826 }, { "epoch": 0.29502004316990443, "grad_norm": 3.8087635040283203, "learning_rate": 8.269068321505225e-06, "loss": 0.9891, "step": 3827 }, { "epoch": 0.29509713228492135, "grad_norm": 3.8580355644226074, "learning_rate": 8.268123575013008e-06, "loss": 0.9276, "step": 3828 }, { "epoch": 0.2951742213999383, "grad_norm": 3.485034227371216, "learning_rate": 8.267178624770212e-06, "loss": 1.0299, "step": 3829 }, { "epoch": 0.2952513105149553, "grad_norm": 3.557379961013794, "learning_rate": 8.26623347083575e-06, "loss": 0.9734, "step": 3830 }, { "epoch": 0.29532839962997226, "grad_norm": 3.9309725761413574, "learning_rate": 8.265288113268548e-06, "loss": 1.0063, "step": 3831 }, { "epoch": 0.29540548874498923, "grad_norm": 3.5980284214019775, "learning_rate": 8.264342552127542e-06, "loss": 0.8885, "step": 3832 }, { "epoch": 0.29548257786000615, "grad_norm": 3.4705045223236084, "learning_rate": 8.263396787471685e-06, "loss": 0.9333, "step": 3833 }, { "epoch": 0.2955596669750231, "grad_norm": 3.6276614665985107, "learning_rate": 8.26245081935994e-06, "loss": 0.9775, "step": 3834 }, { "epoch": 0.2956367560900401, "grad_norm": 3.9132349491119385, "learning_rate": 8.261504647851283e-06, "loss": 0.9186, "step": 3835 }, { "epoch": 0.29571384520505706, "grad_norm": 3.6847267150878906, "learning_rate": 8.260558273004703e-06, "loss": 0.934, "step": 3836 }, { "epoch": 0.29579093432007403, "grad_norm": 3.9422383308410645, "learning_rate": 8.259611694879202e-06, "loss": 0.9443, "step": 3837 }, { "epoch": 0.29586802343509094, "grad_norm": 3.9089691638946533, "learning_rate": 8.258664913533791e-06, "loss": 1.0795, "step": 3838 }, { "epoch": 0.2959451125501079, "grad_norm": 3.8518786430358887, "learning_rate": 8.257717929027504e-06, "loss": 1.1894, "step": 3839 }, { "epoch": 0.2960222016651249, "grad_norm": 3.669287919998169, "learning_rate": 8.256770741419374e-06, "loss": 0.9979, "step": 3840 }, { "epoch": 0.29609929078014185, "grad_norm": 4.07830286026001, "learning_rate": 8.255823350768455e-06, "loss": 1.0086, "step": 3841 }, { "epoch": 0.2961763798951588, "grad_norm": 3.9762747287750244, "learning_rate": 8.254875757133813e-06, "loss": 1.0408, "step": 3842 }, { "epoch": 0.29625346901017574, "grad_norm": 3.584479331970215, "learning_rate": 8.253927960574525e-06, "loss": 1.0119, "step": 3843 }, { "epoch": 0.2963305581251927, "grad_norm": 3.4980578422546387, "learning_rate": 8.252979961149683e-06, "loss": 0.997, "step": 3844 }, { "epoch": 0.2964076472402097, "grad_norm": 3.391723155975342, "learning_rate": 8.252031758918386e-06, "loss": 0.9273, "step": 3845 }, { "epoch": 0.29648473635522665, "grad_norm": 3.5890274047851562, "learning_rate": 8.251083353939752e-06, "loss": 1.0653, "step": 3846 }, { "epoch": 0.2965618254702436, "grad_norm": 3.8049674034118652, "learning_rate": 8.250134746272909e-06, "loss": 1.0471, "step": 3847 }, { "epoch": 0.29663891458526054, "grad_norm": 3.6828417778015137, "learning_rate": 8.249185935976998e-06, "loss": 1.0122, "step": 3848 }, { "epoch": 0.2967160037002775, "grad_norm": 3.8039426803588867, "learning_rate": 8.24823692311117e-06, "loss": 1.0566, "step": 3849 }, { "epoch": 0.2967930928152945, "grad_norm": 4.135082721710205, "learning_rate": 8.247287707734594e-06, "loss": 1.0346, "step": 3850 }, { "epoch": 0.29687018193031145, "grad_norm": 3.7773916721343994, "learning_rate": 8.246338289906447e-06, "loss": 0.9774, "step": 3851 }, { "epoch": 0.2969472710453284, "grad_norm": 3.629441499710083, "learning_rate": 8.245388669685922e-06, "loss": 1.0698, "step": 3852 }, { "epoch": 0.29702436016034534, "grad_norm": 3.7880666255950928, "learning_rate": 8.24443884713222e-06, "loss": 1.0878, "step": 3853 }, { "epoch": 0.2971014492753623, "grad_norm": 3.7315597534179688, "learning_rate": 8.243488822304561e-06, "loss": 1.0335, "step": 3854 }, { "epoch": 0.2971785383903793, "grad_norm": 4.079202651977539, "learning_rate": 8.24253859526217e-06, "loss": 1.0879, "step": 3855 }, { "epoch": 0.29725562750539625, "grad_norm": 4.179590702056885, "learning_rate": 8.241588166064294e-06, "loss": 1.0974, "step": 3856 }, { "epoch": 0.2973327166204132, "grad_norm": 3.6889772415161133, "learning_rate": 8.240637534770182e-06, "loss": 1.0824, "step": 3857 }, { "epoch": 0.29740980573543013, "grad_norm": 3.830030679702759, "learning_rate": 8.239686701439105e-06, "loss": 1.0321, "step": 3858 }, { "epoch": 0.2974868948504471, "grad_norm": 4.01475191116333, "learning_rate": 8.23873566613034e-06, "loss": 1.0373, "step": 3859 }, { "epoch": 0.2975639839654641, "grad_norm": 3.6347076892852783, "learning_rate": 8.237784428903182e-06, "loss": 0.9952, "step": 3860 }, { "epoch": 0.29764107308048104, "grad_norm": 3.510388135910034, "learning_rate": 8.236832989816932e-06, "loss": 0.9956, "step": 3861 }, { "epoch": 0.297718162195498, "grad_norm": 3.1566152572631836, "learning_rate": 8.23588134893091e-06, "loss": 0.9462, "step": 3862 }, { "epoch": 0.29779525131051493, "grad_norm": 4.099442481994629, "learning_rate": 8.234929506304443e-06, "loss": 0.8942, "step": 3863 }, { "epoch": 0.2978723404255319, "grad_norm": 3.81709885597229, "learning_rate": 8.233977461996879e-06, "loss": 0.9744, "step": 3864 }, { "epoch": 0.29794942954054887, "grad_norm": 3.949967861175537, "learning_rate": 8.233025216067567e-06, "loss": 1.0687, "step": 3865 }, { "epoch": 0.29802651865556584, "grad_norm": 3.714970111846924, "learning_rate": 8.232072768575875e-06, "loss": 1.1127, "step": 3866 }, { "epoch": 0.2981036077705828, "grad_norm": 3.600409984588623, "learning_rate": 8.231120119581189e-06, "loss": 0.9565, "step": 3867 }, { "epoch": 0.2981806968855997, "grad_norm": 3.3856570720672607, "learning_rate": 8.230167269142897e-06, "loss": 0.9503, "step": 3868 }, { "epoch": 0.2982577860006167, "grad_norm": 4.353588581085205, "learning_rate": 8.229214217320405e-06, "loss": 1.0665, "step": 3869 }, { "epoch": 0.29833487511563367, "grad_norm": 3.9014816284179688, "learning_rate": 8.22826096417313e-06, "loss": 1.003, "step": 3870 }, { "epoch": 0.29841196423065064, "grad_norm": 4.135391712188721, "learning_rate": 8.227307509760505e-06, "loss": 1.0497, "step": 3871 }, { "epoch": 0.2984890533456676, "grad_norm": 3.6809937953948975, "learning_rate": 8.22635385414197e-06, "loss": 1.09, "step": 3872 }, { "epoch": 0.2985661424606845, "grad_norm": 3.6578569412231445, "learning_rate": 8.225399997376984e-06, "loss": 1.0836, "step": 3873 }, { "epoch": 0.2986432315757015, "grad_norm": 3.6818604469299316, "learning_rate": 8.22444593952501e-06, "loss": 1.0283, "step": 3874 }, { "epoch": 0.29872032069071847, "grad_norm": 3.6182854175567627, "learning_rate": 8.223491680645533e-06, "loss": 0.9956, "step": 3875 }, { "epoch": 0.29879740980573544, "grad_norm": 3.5641016960144043, "learning_rate": 8.222537220798046e-06, "loss": 1.107, "step": 3876 }, { "epoch": 0.2988744989207524, "grad_norm": 3.3794901371002197, "learning_rate": 8.22158256004205e-06, "loss": 0.9449, "step": 3877 }, { "epoch": 0.2989515880357693, "grad_norm": 3.4423718452453613, "learning_rate": 8.220627698437069e-06, "loss": 1.0393, "step": 3878 }, { "epoch": 0.2990286771507863, "grad_norm": 4.050296306610107, "learning_rate": 8.21967263604263e-06, "loss": 1.0819, "step": 3879 }, { "epoch": 0.29910576626580326, "grad_norm": 3.7262563705444336, "learning_rate": 8.218717372918277e-06, "loss": 1.0081, "step": 3880 }, { "epoch": 0.29918285538082023, "grad_norm": 3.7771074771881104, "learning_rate": 8.217761909123567e-06, "loss": 0.932, "step": 3881 }, { "epoch": 0.2992599444958372, "grad_norm": 3.4512460231781006, "learning_rate": 8.216806244718068e-06, "loss": 0.9152, "step": 3882 }, { "epoch": 0.2993370336108541, "grad_norm": 3.5177505016326904, "learning_rate": 8.215850379761357e-06, "loss": 0.9113, "step": 3883 }, { "epoch": 0.2994141227258711, "grad_norm": 3.6450557708740234, "learning_rate": 8.214894314313034e-06, "loss": 0.964, "step": 3884 }, { "epoch": 0.29949121184088806, "grad_norm": 3.773829698562622, "learning_rate": 8.213938048432697e-06, "loss": 1.1156, "step": 3885 }, { "epoch": 0.29956830095590503, "grad_norm": 3.8969192504882812, "learning_rate": 8.21298158217997e-06, "loss": 1.045, "step": 3886 }, { "epoch": 0.299645390070922, "grad_norm": 3.853782892227173, "learning_rate": 8.212024915614482e-06, "loss": 1.0426, "step": 3887 }, { "epoch": 0.299722479185939, "grad_norm": 3.673476219177246, "learning_rate": 8.211068048795877e-06, "loss": 1.0286, "step": 3888 }, { "epoch": 0.2997995683009559, "grad_norm": 3.4116404056549072, "learning_rate": 8.210110981783807e-06, "loss": 0.8559, "step": 3889 }, { "epoch": 0.29987665741597286, "grad_norm": 3.4718472957611084, "learning_rate": 8.209153714637943e-06, "loss": 0.9776, "step": 3890 }, { "epoch": 0.29995374653098983, "grad_norm": 3.6767029762268066, "learning_rate": 8.208196247417968e-06, "loss": 1.1066, "step": 3891 }, { "epoch": 0.3000308356460068, "grad_norm": 3.493272066116333, "learning_rate": 8.207238580183571e-06, "loss": 1.0995, "step": 3892 }, { "epoch": 0.30010792476102377, "grad_norm": 3.817072868347168, "learning_rate": 8.206280712994459e-06, "loss": 1.033, "step": 3893 }, { "epoch": 0.3001850138760407, "grad_norm": 4.376103401184082, "learning_rate": 8.205322645910352e-06, "loss": 0.8971, "step": 3894 }, { "epoch": 0.30026210299105766, "grad_norm": 3.5054662227630615, "learning_rate": 8.204364378990976e-06, "loss": 1.0437, "step": 3895 }, { "epoch": 0.3003391921060746, "grad_norm": 3.6018145084381104, "learning_rate": 8.203405912296079e-06, "loss": 0.9484, "step": 3896 }, { "epoch": 0.3004162812210916, "grad_norm": 3.9170470237731934, "learning_rate": 8.202447245885414e-06, "loss": 0.9874, "step": 3897 }, { "epoch": 0.30049337033610857, "grad_norm": 3.545153856277466, "learning_rate": 8.20148837981875e-06, "loss": 0.9289, "step": 3898 }, { "epoch": 0.3005704594511255, "grad_norm": 4.0050482749938965, "learning_rate": 8.200529314155865e-06, "loss": 1.1052, "step": 3899 }, { "epoch": 0.30064754856614245, "grad_norm": 3.5742580890655518, "learning_rate": 8.199570048956553e-06, "loss": 0.9474, "step": 3900 }, { "epoch": 0.3007246376811594, "grad_norm": 3.7283453941345215, "learning_rate": 8.19861058428062e-06, "loss": 0.9697, "step": 3901 }, { "epoch": 0.3008017267961764, "grad_norm": 3.4887049198150635, "learning_rate": 8.197650920187882e-06, "loss": 0.9355, "step": 3902 }, { "epoch": 0.30087881591119336, "grad_norm": 4.035374641418457, "learning_rate": 8.196691056738173e-06, "loss": 1.0705, "step": 3903 }, { "epoch": 0.3009559050262103, "grad_norm": 3.5932343006134033, "learning_rate": 8.19573099399133e-06, "loss": 1.0442, "step": 3904 }, { "epoch": 0.30103299414122725, "grad_norm": 3.732898712158203, "learning_rate": 8.19477073200721e-06, "loss": 0.9415, "step": 3905 }, { "epoch": 0.3011100832562442, "grad_norm": 3.413482666015625, "learning_rate": 8.193810270845683e-06, "loss": 0.8275, "step": 3906 }, { "epoch": 0.3011871723712612, "grad_norm": 4.12077522277832, "learning_rate": 8.192849610566627e-06, "loss": 0.9177, "step": 3907 }, { "epoch": 0.30126426148627816, "grad_norm": 3.9081289768218994, "learning_rate": 8.191888751229934e-06, "loss": 0.9281, "step": 3908 }, { "epoch": 0.3013413506012951, "grad_norm": 3.6880979537963867, "learning_rate": 8.190927692895508e-06, "loss": 1.0174, "step": 3909 }, { "epoch": 0.30141843971631205, "grad_norm": 3.402858018875122, "learning_rate": 8.189966435623266e-06, "loss": 1.0482, "step": 3910 }, { "epoch": 0.301495528831329, "grad_norm": 3.7268900871276855, "learning_rate": 8.189004979473138e-06, "loss": 1.1489, "step": 3911 }, { "epoch": 0.301572617946346, "grad_norm": 3.7350738048553467, "learning_rate": 8.188043324505067e-06, "loss": 1.0077, "step": 3912 }, { "epoch": 0.30164970706136296, "grad_norm": 3.5450897216796875, "learning_rate": 8.187081470779006e-06, "loss": 0.9558, "step": 3913 }, { "epoch": 0.3017267961763799, "grad_norm": 3.7310359477996826, "learning_rate": 8.18611941835492e-06, "loss": 0.9612, "step": 3914 }, { "epoch": 0.30180388529139685, "grad_norm": 3.8756463527679443, "learning_rate": 8.185157167292791e-06, "loss": 0.9793, "step": 3915 }, { "epoch": 0.3018809744064138, "grad_norm": 4.031418323516846, "learning_rate": 8.184194717652609e-06, "loss": 1.0657, "step": 3916 }, { "epoch": 0.3019580635214308, "grad_norm": 3.99357008934021, "learning_rate": 8.183232069494378e-06, "loss": 0.984, "step": 3917 }, { "epoch": 0.30203515263644776, "grad_norm": 3.870607852935791, "learning_rate": 8.182269222878112e-06, "loss": 0.9012, "step": 3918 }, { "epoch": 0.30211224175146467, "grad_norm": 3.4795637130737305, "learning_rate": 8.181306177863843e-06, "loss": 0.8978, "step": 3919 }, { "epoch": 0.30218933086648164, "grad_norm": 3.652648687362671, "learning_rate": 8.18034293451161e-06, "loss": 1.0422, "step": 3920 }, { "epoch": 0.3022664199814986, "grad_norm": 3.868264675140381, "learning_rate": 8.179379492881465e-06, "loss": 1.0272, "step": 3921 }, { "epoch": 0.3023435090965156, "grad_norm": 3.5765531063079834, "learning_rate": 8.178415853033477e-06, "loss": 0.9447, "step": 3922 }, { "epoch": 0.30242059821153255, "grad_norm": 4.241714000701904, "learning_rate": 8.177452015027721e-06, "loss": 0.9008, "step": 3923 }, { "epoch": 0.30249768732654947, "grad_norm": 3.668001413345337, "learning_rate": 8.176487978924288e-06, "loss": 0.9762, "step": 3924 }, { "epoch": 0.30257477644156644, "grad_norm": 3.9289772510528564, "learning_rate": 8.175523744783281e-06, "loss": 1.0743, "step": 3925 }, { "epoch": 0.3026518655565834, "grad_norm": 3.804590940475464, "learning_rate": 8.174559312664815e-06, "loss": 0.9993, "step": 3926 }, { "epoch": 0.3027289546716004, "grad_norm": 3.694150924682617, "learning_rate": 8.173594682629018e-06, "loss": 0.9693, "step": 3927 }, { "epoch": 0.30280604378661735, "grad_norm": 3.8130977153778076, "learning_rate": 8.172629854736029e-06, "loss": 1.0525, "step": 3928 }, { "epoch": 0.30288313290163427, "grad_norm": 4.602511405944824, "learning_rate": 8.171664829046e-06, "loss": 1.1879, "step": 3929 }, { "epoch": 0.30296022201665124, "grad_norm": 3.7024033069610596, "learning_rate": 8.170699605619096e-06, "loss": 0.9487, "step": 3930 }, { "epoch": 0.3030373111316682, "grad_norm": 3.632319688796997, "learning_rate": 8.169734184515493e-06, "loss": 1.1081, "step": 3931 }, { "epoch": 0.3031144002466852, "grad_norm": 3.9944117069244385, "learning_rate": 8.168768565795377e-06, "loss": 0.9995, "step": 3932 }, { "epoch": 0.30319148936170215, "grad_norm": 3.9244701862335205, "learning_rate": 8.167802749518956e-06, "loss": 1.1025, "step": 3933 }, { "epoch": 0.30326857847671906, "grad_norm": 3.9141428470611572, "learning_rate": 8.166836735746438e-06, "loss": 1.0337, "step": 3934 }, { "epoch": 0.30334566759173603, "grad_norm": 3.821218967437744, "learning_rate": 8.165870524538052e-06, "loss": 1.0971, "step": 3935 }, { "epoch": 0.303422756706753, "grad_norm": 4.062706470489502, "learning_rate": 8.164904115954036e-06, "loss": 1.1178, "step": 3936 }, { "epoch": 0.30349984582177, "grad_norm": 3.5247349739074707, "learning_rate": 8.163937510054638e-06, "loss": 0.9598, "step": 3937 }, { "epoch": 0.30357693493678695, "grad_norm": 3.914620876312256, "learning_rate": 8.162970706900124e-06, "loss": 1.0434, "step": 3938 }, { "epoch": 0.30365402405180386, "grad_norm": 3.9191715717315674, "learning_rate": 8.162003706550767e-06, "loss": 1.163, "step": 3939 }, { "epoch": 0.30373111316682083, "grad_norm": 3.459897994995117, "learning_rate": 8.161036509066856e-06, "loss": 1.0139, "step": 3940 }, { "epoch": 0.3038082022818378, "grad_norm": 3.6575376987457275, "learning_rate": 8.16006911450869e-06, "loss": 0.9167, "step": 3941 }, { "epoch": 0.3038852913968548, "grad_norm": 3.691288948059082, "learning_rate": 8.159101522936582e-06, "loss": 0.9456, "step": 3942 }, { "epoch": 0.30396238051187174, "grad_norm": 4.538893222808838, "learning_rate": 8.158133734410853e-06, "loss": 1.0096, "step": 3943 }, { "epoch": 0.30403946962688866, "grad_norm": 3.614013671875, "learning_rate": 8.157165748991845e-06, "loss": 1.0055, "step": 3944 }, { "epoch": 0.30411655874190563, "grad_norm": 3.9665722846984863, "learning_rate": 8.156197566739901e-06, "loss": 1.1126, "step": 3945 }, { "epoch": 0.3041936478569226, "grad_norm": 3.547726631164551, "learning_rate": 8.155229187715385e-06, "loss": 1.0368, "step": 3946 }, { "epoch": 0.30427073697193957, "grad_norm": 3.3927464485168457, "learning_rate": 8.154260611978673e-06, "loss": 1.0033, "step": 3947 }, { "epoch": 0.30434782608695654, "grad_norm": 3.441749334335327, "learning_rate": 8.153291839590147e-06, "loss": 0.9378, "step": 3948 }, { "epoch": 0.30442491520197346, "grad_norm": 3.7463624477386475, "learning_rate": 8.152322870610206e-06, "loss": 1.0562, "step": 3949 }, { "epoch": 0.3045020043169904, "grad_norm": 3.6333110332489014, "learning_rate": 8.151353705099261e-06, "loss": 1.0003, "step": 3950 }, { "epoch": 0.3045790934320074, "grad_norm": 3.9135475158691406, "learning_rate": 8.150384343117733e-06, "loss": 1.1028, "step": 3951 }, { "epoch": 0.30465618254702437, "grad_norm": 3.670753240585327, "learning_rate": 8.149414784726058e-06, "loss": 0.9757, "step": 3952 }, { "epoch": 0.30473327166204134, "grad_norm": 3.691105365753174, "learning_rate": 8.148445029984683e-06, "loss": 1.0124, "step": 3953 }, { "epoch": 0.30481036077705825, "grad_norm": 3.77278995513916, "learning_rate": 8.147475078954067e-06, "loss": 0.9402, "step": 3954 }, { "epoch": 0.3048874498920752, "grad_norm": 3.847795009613037, "learning_rate": 8.146504931694678e-06, "loss": 1.0041, "step": 3955 }, { "epoch": 0.3049645390070922, "grad_norm": 3.791046380996704, "learning_rate": 8.145534588267006e-06, "loss": 1.023, "step": 3956 }, { "epoch": 0.30504162812210917, "grad_norm": 3.8239328861236572, "learning_rate": 8.144564048731542e-06, "loss": 0.9401, "step": 3957 }, { "epoch": 0.30511871723712614, "grad_norm": 3.5764706134796143, "learning_rate": 8.143593313148794e-06, "loss": 0.9707, "step": 3958 }, { "epoch": 0.30519580635214305, "grad_norm": 4.011223793029785, "learning_rate": 8.142622381579285e-06, "loss": 1.0103, "step": 3959 }, { "epoch": 0.30527289546716, "grad_norm": 4.034478664398193, "learning_rate": 8.14165125408355e-06, "loss": 0.8388, "step": 3960 }, { "epoch": 0.305349984582177, "grad_norm": 3.6760923862457275, "learning_rate": 8.140679930722126e-06, "loss": 1.0874, "step": 3961 }, { "epoch": 0.30542707369719396, "grad_norm": 3.658113718032837, "learning_rate": 8.139708411555575e-06, "loss": 0.9909, "step": 3962 }, { "epoch": 0.30550416281221093, "grad_norm": 4.3825364112854, "learning_rate": 8.138736696644467e-06, "loss": 1.0232, "step": 3963 }, { "epoch": 0.30558125192722785, "grad_norm": 3.9493494033813477, "learning_rate": 8.137764786049382e-06, "loss": 0.9977, "step": 3964 }, { "epoch": 0.3056583410422448, "grad_norm": 3.7453453540802, "learning_rate": 8.136792679830913e-06, "loss": 0.9792, "step": 3965 }, { "epoch": 0.3057354301572618, "grad_norm": 3.767953634262085, "learning_rate": 8.135820378049667e-06, "loss": 1.0847, "step": 3966 }, { "epoch": 0.30581251927227876, "grad_norm": 3.4496097564697266, "learning_rate": 8.13484788076626e-06, "loss": 0.9574, "step": 3967 }, { "epoch": 0.30588960838729573, "grad_norm": 3.7329258918762207, "learning_rate": 8.133875188041323e-06, "loss": 0.9645, "step": 3968 }, { "epoch": 0.3059666975023127, "grad_norm": 3.522775888442993, "learning_rate": 8.1329022999355e-06, "loss": 1.0296, "step": 3969 }, { "epoch": 0.3060437866173296, "grad_norm": 3.753998041152954, "learning_rate": 8.131929216509445e-06, "loss": 1.0741, "step": 3970 }, { "epoch": 0.3061208757323466, "grad_norm": 3.6926846504211426, "learning_rate": 8.130955937823821e-06, "loss": 0.963, "step": 3971 }, { "epoch": 0.30619796484736356, "grad_norm": 3.8235857486724854, "learning_rate": 8.129982463939313e-06, "loss": 1.0803, "step": 3972 }, { "epoch": 0.30627505396238053, "grad_norm": 3.8400890827178955, "learning_rate": 8.129008794916609e-06, "loss": 1.074, "step": 3973 }, { "epoch": 0.3063521430773975, "grad_norm": 3.7315080165863037, "learning_rate": 8.12803493081641e-06, "loss": 1.1322, "step": 3974 }, { "epoch": 0.3064292321924144, "grad_norm": 3.864604949951172, "learning_rate": 8.127060871699435e-06, "loss": 1.0681, "step": 3975 }, { "epoch": 0.3065063213074314, "grad_norm": 4.137643337249756, "learning_rate": 8.12608661762641e-06, "loss": 1.0562, "step": 3976 }, { "epoch": 0.30658341042244835, "grad_norm": 3.801588296890259, "learning_rate": 8.125112168658074e-06, "loss": 1.0874, "step": 3977 }, { "epoch": 0.3066604995374653, "grad_norm": 3.4541311264038086, "learning_rate": 8.12413752485518e-06, "loss": 0.9565, "step": 3978 }, { "epoch": 0.3067375886524823, "grad_norm": 3.6811790466308594, "learning_rate": 8.123162686278493e-06, "loss": 1.0322, "step": 3979 }, { "epoch": 0.3068146777674992, "grad_norm": 4.22686243057251, "learning_rate": 8.122187652988786e-06, "loss": 0.9425, "step": 3980 }, { "epoch": 0.3068917668825162, "grad_norm": 3.42470383644104, "learning_rate": 8.12121242504685e-06, "loss": 0.9987, "step": 3981 }, { "epoch": 0.30696885599753315, "grad_norm": 3.6424388885498047, "learning_rate": 8.120237002513484e-06, "loss": 0.979, "step": 3982 }, { "epoch": 0.3070459451125501, "grad_norm": 3.6647961139678955, "learning_rate": 8.119261385449502e-06, "loss": 1.0419, "step": 3983 }, { "epoch": 0.3071230342275671, "grad_norm": 3.583043098449707, "learning_rate": 8.118285573915726e-06, "loss": 0.9044, "step": 3984 }, { "epoch": 0.307200123342584, "grad_norm": 3.9360716342926025, "learning_rate": 8.117309567972995e-06, "loss": 1.0924, "step": 3985 }, { "epoch": 0.307277212457601, "grad_norm": 3.999488592147827, "learning_rate": 8.116333367682158e-06, "loss": 1.0132, "step": 3986 }, { "epoch": 0.30735430157261795, "grad_norm": 3.6380109786987305, "learning_rate": 8.115356973104076e-06, "loss": 1.0445, "step": 3987 }, { "epoch": 0.3074313906876349, "grad_norm": 3.977320909500122, "learning_rate": 8.11438038429962e-06, "loss": 1.0968, "step": 3988 }, { "epoch": 0.3075084798026519, "grad_norm": 3.90857195854187, "learning_rate": 8.113403601329678e-06, "loss": 1.1308, "step": 3989 }, { "epoch": 0.3075855689176688, "grad_norm": 3.865142345428467, "learning_rate": 8.112426624255145e-06, "loss": 0.9358, "step": 3990 }, { "epoch": 0.3076626580326858, "grad_norm": 3.489595890045166, "learning_rate": 8.111449453136932e-06, "loss": 0.9765, "step": 3991 }, { "epoch": 0.30773974714770275, "grad_norm": 4.063758850097656, "learning_rate": 8.110472088035961e-06, "loss": 0.9821, "step": 3992 }, { "epoch": 0.3078168362627197, "grad_norm": 3.65039324760437, "learning_rate": 8.109494529013165e-06, "loss": 1.0804, "step": 3993 }, { "epoch": 0.3078939253777367, "grad_norm": 3.800696611404419, "learning_rate": 8.108516776129489e-06, "loss": 0.8819, "step": 3994 }, { "epoch": 0.3079710144927536, "grad_norm": 3.8089771270751953, "learning_rate": 8.107538829445891e-06, "loss": 1.0294, "step": 3995 }, { "epoch": 0.3080481036077706, "grad_norm": 3.4781205654144287, "learning_rate": 8.106560689023342e-06, "loss": 0.9528, "step": 3996 }, { "epoch": 0.30812519272278754, "grad_norm": 3.5084052085876465, "learning_rate": 8.105582354922822e-06, "loss": 0.9852, "step": 3997 }, { "epoch": 0.3082022818378045, "grad_norm": 3.660351037979126, "learning_rate": 8.104603827205329e-06, "loss": 1.032, "step": 3998 }, { "epoch": 0.3082793709528215, "grad_norm": 3.5761704444885254, "learning_rate": 8.103625105931865e-06, "loss": 0.9191, "step": 3999 }, { "epoch": 0.3083564600678384, "grad_norm": 3.8501265048980713, "learning_rate": 8.10264619116345e-06, "loss": 1.0343, "step": 4000 }, { "epoch": 0.30843354918285537, "grad_norm": 3.4951345920562744, "learning_rate": 8.101667082961114e-06, "loss": 0.9921, "step": 4001 }, { "epoch": 0.30851063829787234, "grad_norm": 3.844111204147339, "learning_rate": 8.1006877813859e-06, "loss": 0.979, "step": 4002 }, { "epoch": 0.3085877274128893, "grad_norm": 3.7992372512817383, "learning_rate": 8.09970828649886e-06, "loss": 1.0304, "step": 4003 }, { "epoch": 0.3086648165279063, "grad_norm": 4.003841876983643, "learning_rate": 8.098728598361063e-06, "loss": 1.0203, "step": 4004 }, { "epoch": 0.3087419056429232, "grad_norm": 3.7176103591918945, "learning_rate": 8.097748717033587e-06, "loss": 0.9943, "step": 4005 }, { "epoch": 0.30881899475794017, "grad_norm": 3.827420234680176, "learning_rate": 8.096768642577521e-06, "loss": 0.9806, "step": 4006 }, { "epoch": 0.30889608387295714, "grad_norm": 3.841545820236206, "learning_rate": 8.09578837505397e-06, "loss": 1.0092, "step": 4007 }, { "epoch": 0.3089731729879741, "grad_norm": 3.845114231109619, "learning_rate": 8.094807914524048e-06, "loss": 1.0792, "step": 4008 }, { "epoch": 0.3090502621029911, "grad_norm": 3.8567841053009033, "learning_rate": 8.093827261048879e-06, "loss": 1.0728, "step": 4009 }, { "epoch": 0.309127351218008, "grad_norm": 3.801530361175537, "learning_rate": 8.092846414689605e-06, "loss": 0.9051, "step": 4010 }, { "epoch": 0.30920444033302497, "grad_norm": 3.59698748588562, "learning_rate": 8.091865375507375e-06, "loss": 1.0773, "step": 4011 }, { "epoch": 0.30928152944804194, "grad_norm": 3.8643345832824707, "learning_rate": 8.090884143563352e-06, "loss": 0.9275, "step": 4012 }, { "epoch": 0.3093586185630589, "grad_norm": 3.8212454319000244, "learning_rate": 8.08990271891871e-06, "loss": 1.0513, "step": 4013 }, { "epoch": 0.3094357076780759, "grad_norm": 3.5950045585632324, "learning_rate": 8.088921101634637e-06, "loss": 1.0344, "step": 4014 }, { "epoch": 0.3095127967930928, "grad_norm": 3.403311252593994, "learning_rate": 8.087939291772331e-06, "loss": 0.9926, "step": 4015 }, { "epoch": 0.30958988590810976, "grad_norm": 4.000549793243408, "learning_rate": 8.086957289393002e-06, "loss": 1.0657, "step": 4016 }, { "epoch": 0.30966697502312673, "grad_norm": 3.72603702545166, "learning_rate": 8.085975094557876e-06, "loss": 1.061, "step": 4017 }, { "epoch": 0.3097440641381437, "grad_norm": 3.4024152755737305, "learning_rate": 8.084992707328184e-06, "loss": 0.9515, "step": 4018 }, { "epoch": 0.3098211532531607, "grad_norm": 3.527376174926758, "learning_rate": 8.084010127765174e-06, "loss": 0.9454, "step": 4019 }, { "epoch": 0.3098982423681776, "grad_norm": 3.594805955886841, "learning_rate": 8.083027355930106e-06, "loss": 0.9794, "step": 4020 }, { "epoch": 0.30997533148319456, "grad_norm": 3.747175693511963, "learning_rate": 8.08204439188425e-06, "loss": 0.9221, "step": 4021 }, { "epoch": 0.31005242059821153, "grad_norm": 3.6072657108306885, "learning_rate": 8.081061235688889e-06, "loss": 1.0778, "step": 4022 }, { "epoch": 0.3101295097132285, "grad_norm": 3.919334888458252, "learning_rate": 8.080077887405315e-06, "loss": 0.9001, "step": 4023 }, { "epoch": 0.3102065988282455, "grad_norm": 4.0752973556518555, "learning_rate": 8.079094347094839e-06, "loss": 1.0525, "step": 4024 }, { "epoch": 0.3102836879432624, "grad_norm": 3.7410547733306885, "learning_rate": 8.078110614818777e-06, "loss": 0.9558, "step": 4025 }, { "epoch": 0.31036077705827936, "grad_norm": 3.7749099731445312, "learning_rate": 8.07712669063846e-06, "loss": 0.9536, "step": 4026 }, { "epoch": 0.31043786617329633, "grad_norm": 3.8618111610412598, "learning_rate": 8.07614257461523e-06, "loss": 0.9301, "step": 4027 }, { "epoch": 0.3105149552883133, "grad_norm": 3.772061586380005, "learning_rate": 8.075158266810442e-06, "loss": 1.0186, "step": 4028 }, { "epoch": 0.31059204440333027, "grad_norm": 4.043780326843262, "learning_rate": 8.074173767285465e-06, "loss": 1.0044, "step": 4029 }, { "epoch": 0.3106691335183472, "grad_norm": 4.081550598144531, "learning_rate": 8.073189076101673e-06, "loss": 1.1462, "step": 4030 }, { "epoch": 0.31074622263336416, "grad_norm": 3.861663818359375, "learning_rate": 8.072204193320459e-06, "loss": 1.1358, "step": 4031 }, { "epoch": 0.3108233117483811, "grad_norm": 3.9835574626922607, "learning_rate": 8.071219119003223e-06, "loss": 1.1444, "step": 4032 }, { "epoch": 0.3109004008633981, "grad_norm": 3.3269333839416504, "learning_rate": 8.070233853211385e-06, "loss": 0.8577, "step": 4033 }, { "epoch": 0.31097748997841507, "grad_norm": 4.0250091552734375, "learning_rate": 8.069248396006365e-06, "loss": 0.9865, "step": 4034 }, { "epoch": 0.311054579093432, "grad_norm": 3.576920986175537, "learning_rate": 8.068262747449604e-06, "loss": 1.0196, "step": 4035 }, { "epoch": 0.31113166820844895, "grad_norm": 3.694272994995117, "learning_rate": 8.067276907602551e-06, "loss": 1.0175, "step": 4036 }, { "epoch": 0.3112087573234659, "grad_norm": 3.5465962886810303, "learning_rate": 8.06629087652667e-06, "loss": 1.0006, "step": 4037 }, { "epoch": 0.3112858464384829, "grad_norm": 3.7290923595428467, "learning_rate": 8.065304654283434e-06, "loss": 0.9712, "step": 4038 }, { "epoch": 0.31136293555349986, "grad_norm": 3.9318015575408936, "learning_rate": 8.064318240934327e-06, "loss": 0.9798, "step": 4039 }, { "epoch": 0.3114400246685168, "grad_norm": 3.745164394378662, "learning_rate": 8.063331636540848e-06, "loss": 0.9533, "step": 4040 }, { "epoch": 0.31151711378353375, "grad_norm": 4.017617225646973, "learning_rate": 8.062344841164508e-06, "loss": 1.0166, "step": 4041 }, { "epoch": 0.3115942028985507, "grad_norm": 3.4828991889953613, "learning_rate": 8.061357854866827e-06, "loss": 0.9534, "step": 4042 }, { "epoch": 0.3116712920135677, "grad_norm": 3.8771891593933105, "learning_rate": 8.060370677709338e-06, "loss": 0.9229, "step": 4043 }, { "epoch": 0.31174838112858466, "grad_norm": 4.284106731414795, "learning_rate": 8.059383309753587e-06, "loss": 0.8905, "step": 4044 }, { "epoch": 0.3118254702436016, "grad_norm": 3.635425567626953, "learning_rate": 8.058395751061135e-06, "loss": 1.0268, "step": 4045 }, { "epoch": 0.31190255935861855, "grad_norm": 3.7544236183166504, "learning_rate": 8.057408001693544e-06, "loss": 0.9735, "step": 4046 }, { "epoch": 0.3119796484736355, "grad_norm": 3.6741573810577393, "learning_rate": 8.0564200617124e-06, "loss": 1.0407, "step": 4047 }, { "epoch": 0.3120567375886525, "grad_norm": 3.6029105186462402, "learning_rate": 8.055431931179296e-06, "loss": 0.9976, "step": 4048 }, { "epoch": 0.31213382670366946, "grad_norm": 3.3681766986846924, "learning_rate": 8.054443610155836e-06, "loss": 0.9445, "step": 4049 }, { "epoch": 0.3122109158186864, "grad_norm": 3.6307098865509033, "learning_rate": 8.053455098703635e-06, "loss": 0.9859, "step": 4050 }, { "epoch": 0.31228800493370334, "grad_norm": 3.4906904697418213, "learning_rate": 8.052466396884323e-06, "loss": 1.0344, "step": 4051 }, { "epoch": 0.3123650940487203, "grad_norm": 3.574552297592163, "learning_rate": 8.05147750475954e-06, "loss": 0.9103, "step": 4052 }, { "epoch": 0.3124421831637373, "grad_norm": 4.1314377784729, "learning_rate": 8.050488422390939e-06, "loss": 1.0707, "step": 4053 }, { "epoch": 0.31251927227875426, "grad_norm": 4.234683990478516, "learning_rate": 8.049499149840183e-06, "loss": 1.0068, "step": 4054 }, { "epoch": 0.3125963613937712, "grad_norm": 3.7397048473358154, "learning_rate": 8.048509687168949e-06, "loss": 0.9691, "step": 4055 }, { "epoch": 0.31267345050878814, "grad_norm": 3.67864727973938, "learning_rate": 8.047520034438925e-06, "loss": 0.9445, "step": 4056 }, { "epoch": 0.3127505396238051, "grad_norm": 4.083065032958984, "learning_rate": 8.046530191711808e-06, "loss": 1.1873, "step": 4057 }, { "epoch": 0.3128276287388221, "grad_norm": 3.7557568550109863, "learning_rate": 8.045540159049315e-06, "loss": 1.0247, "step": 4058 }, { "epoch": 0.31290471785383905, "grad_norm": 3.5163233280181885, "learning_rate": 8.044549936513165e-06, "loss": 1.039, "step": 4059 }, { "epoch": 0.312981806968856, "grad_norm": 7.071890354156494, "learning_rate": 8.043559524165096e-06, "loss": 1.0503, "step": 4060 }, { "epoch": 0.31305889608387294, "grad_norm": 3.8393161296844482, "learning_rate": 8.042568922066852e-06, "loss": 1.083, "step": 4061 }, { "epoch": 0.3131359851988899, "grad_norm": 4.509868144989014, "learning_rate": 8.041578130280194e-06, "loss": 1.0581, "step": 4062 }, { "epoch": 0.3132130743139069, "grad_norm": 4.245510578155518, "learning_rate": 8.040587148866893e-06, "loss": 0.9752, "step": 4063 }, { "epoch": 0.31329016342892385, "grad_norm": 4.26503849029541, "learning_rate": 8.03959597788873e-06, "loss": 0.9107, "step": 4064 }, { "epoch": 0.3133672525439408, "grad_norm": 3.7438037395477295, "learning_rate": 8.038604617407501e-06, "loss": 0.9565, "step": 4065 }, { "epoch": 0.31344434165895774, "grad_norm": 5.089138507843018, "learning_rate": 8.037613067485012e-06, "loss": 1.0168, "step": 4066 }, { "epoch": 0.3135214307739747, "grad_norm": 4.153073310852051, "learning_rate": 8.036621328183079e-06, "loss": 1.0259, "step": 4067 }, { "epoch": 0.3135985198889917, "grad_norm": 3.4369683265686035, "learning_rate": 8.035629399563533e-06, "loss": 0.9818, "step": 4068 }, { "epoch": 0.31367560900400865, "grad_norm": 3.7807652950286865, "learning_rate": 8.034637281688219e-06, "loss": 1.0017, "step": 4069 }, { "epoch": 0.3137526981190256, "grad_norm": 3.68034291267395, "learning_rate": 8.033644974618983e-06, "loss": 0.9743, "step": 4070 }, { "epoch": 0.31382978723404253, "grad_norm": 3.763885021209717, "learning_rate": 8.032652478417697e-06, "loss": 0.9948, "step": 4071 }, { "epoch": 0.3139068763490595, "grad_norm": 3.9806506633758545, "learning_rate": 8.031659793146237e-06, "loss": 1.1496, "step": 4072 }, { "epoch": 0.3139839654640765, "grad_norm": 3.6926496028900146, "learning_rate": 8.030666918866487e-06, "loss": 1.0143, "step": 4073 }, { "epoch": 0.31406105457909345, "grad_norm": 3.7122464179992676, "learning_rate": 8.029673855640352e-06, "loss": 1.0272, "step": 4074 }, { "epoch": 0.3141381436941104, "grad_norm": 3.5680923461914062, "learning_rate": 8.028680603529742e-06, "loss": 0.8766, "step": 4075 }, { "epoch": 0.31421523280912733, "grad_norm": 3.5684890747070312, "learning_rate": 8.027687162596584e-06, "loss": 0.9874, "step": 4076 }, { "epoch": 0.3142923219241443, "grad_norm": 3.833163022994995, "learning_rate": 8.026693532902811e-06, "loss": 1.0566, "step": 4077 }, { "epoch": 0.3143694110391613, "grad_norm": 4.123239040374756, "learning_rate": 8.025699714510374e-06, "loss": 1.093, "step": 4078 }, { "epoch": 0.31444650015417824, "grad_norm": 3.9509053230285645, "learning_rate": 8.024705707481228e-06, "loss": 0.9761, "step": 4079 }, { "epoch": 0.3145235892691952, "grad_norm": 3.3474016189575195, "learning_rate": 8.023711511877347e-06, "loss": 0.9236, "step": 4080 }, { "epoch": 0.31460067838421213, "grad_norm": 4.015679359436035, "learning_rate": 8.022717127760715e-06, "loss": 1.0176, "step": 4081 }, { "epoch": 0.3146777674992291, "grad_norm": 3.6416444778442383, "learning_rate": 8.021722555193324e-06, "loss": 1.0104, "step": 4082 }, { "epoch": 0.31475485661424607, "grad_norm": 3.4460465908050537, "learning_rate": 8.020727794237182e-06, "loss": 0.8351, "step": 4083 }, { "epoch": 0.31483194572926304, "grad_norm": 3.7392327785491943, "learning_rate": 8.019732844954306e-06, "loss": 1.059, "step": 4084 }, { "epoch": 0.31490903484428, "grad_norm": 3.5918447971343994, "learning_rate": 8.018737707406728e-06, "loss": 1.0905, "step": 4085 }, { "epoch": 0.3149861239592969, "grad_norm": 3.957096576690674, "learning_rate": 8.017742381656486e-06, "loss": 0.9457, "step": 4086 }, { "epoch": 0.3150632130743139, "grad_norm": 3.9353511333465576, "learning_rate": 8.016746867765639e-06, "loss": 0.9994, "step": 4087 }, { "epoch": 0.31514030218933087, "grad_norm": 3.582047700881958, "learning_rate": 8.015751165796247e-06, "loss": 1.0187, "step": 4088 }, { "epoch": 0.31521739130434784, "grad_norm": 3.996525287628174, "learning_rate": 8.014755275810389e-06, "loss": 1.0468, "step": 4089 }, { "epoch": 0.3152944804193648, "grad_norm": 3.70849871635437, "learning_rate": 8.013759197870153e-06, "loss": 1.1356, "step": 4090 }, { "epoch": 0.3153715695343817, "grad_norm": 3.3762667179107666, "learning_rate": 8.012762932037638e-06, "loss": 0.873, "step": 4091 }, { "epoch": 0.3154486586493987, "grad_norm": 3.614732503890991, "learning_rate": 8.011766478374961e-06, "loss": 0.9919, "step": 4092 }, { "epoch": 0.31552574776441566, "grad_norm": 3.7229347229003906, "learning_rate": 8.010769836944241e-06, "loss": 0.9781, "step": 4093 }, { "epoch": 0.31560283687943264, "grad_norm": 3.661196231842041, "learning_rate": 8.009773007807615e-06, "loss": 1.0271, "step": 4094 }, { "epoch": 0.3156799259944496, "grad_norm": 3.394929885864258, "learning_rate": 8.00877599102723e-06, "loss": 1.0682, "step": 4095 }, { "epoch": 0.3157570151094665, "grad_norm": 3.5974974632263184, "learning_rate": 8.007778786665246e-06, "loss": 0.9009, "step": 4096 }, { "epoch": 0.3158341042244835, "grad_norm": 3.423668622970581, "learning_rate": 8.006781394783831e-06, "loss": 0.9942, "step": 4097 }, { "epoch": 0.31591119333950046, "grad_norm": 3.99906849861145, "learning_rate": 8.005783815445168e-06, "loss": 1.1061, "step": 4098 }, { "epoch": 0.31598828245451743, "grad_norm": 3.8605785369873047, "learning_rate": 8.004786048711452e-06, "loss": 1.059, "step": 4099 }, { "epoch": 0.3160653715695344, "grad_norm": 3.41093373298645, "learning_rate": 8.003788094644888e-06, "loss": 0.9386, "step": 4100 }, { "epoch": 0.3161424606845513, "grad_norm": 4.836550235748291, "learning_rate": 8.002789953307692e-06, "loss": 0.9721, "step": 4101 }, { "epoch": 0.3162195497995683, "grad_norm": 3.4783308506011963, "learning_rate": 8.001791624762097e-06, "loss": 0.8727, "step": 4102 }, { "epoch": 0.31629663891458526, "grad_norm": 3.9497110843658447, "learning_rate": 8.00079310907034e-06, "loss": 1.0713, "step": 4103 }, { "epoch": 0.31637372802960223, "grad_norm": 3.943293809890747, "learning_rate": 7.999794406294674e-06, "loss": 0.9778, "step": 4104 }, { "epoch": 0.3164508171446192, "grad_norm": 4.106851100921631, "learning_rate": 7.998795516497362e-06, "loss": 1.0506, "step": 4105 }, { "epoch": 0.3165279062596361, "grad_norm": 4.133145332336426, "learning_rate": 7.997796439740682e-06, "loss": 1.1453, "step": 4106 }, { "epoch": 0.3166049953746531, "grad_norm": 3.896491289138794, "learning_rate": 7.99679717608692e-06, "loss": 1.0087, "step": 4107 }, { "epoch": 0.31668208448967006, "grad_norm": 3.7162492275238037, "learning_rate": 7.995797725598373e-06, "loss": 1.0709, "step": 4108 }, { "epoch": 0.316759173604687, "grad_norm": 4.147136688232422, "learning_rate": 7.994798088337357e-06, "loss": 1.0759, "step": 4109 }, { "epoch": 0.316836262719704, "grad_norm": 3.5119404792785645, "learning_rate": 7.993798264366189e-06, "loss": 0.9334, "step": 4110 }, { "epoch": 0.3169133518347209, "grad_norm": 3.72426438331604, "learning_rate": 7.992798253747202e-06, "loss": 1.002, "step": 4111 }, { "epoch": 0.3169904409497379, "grad_norm": 3.564457654953003, "learning_rate": 7.991798056542747e-06, "loss": 1.0832, "step": 4112 }, { "epoch": 0.31706753006475485, "grad_norm": 3.688892126083374, "learning_rate": 7.990797672815177e-06, "loss": 1.0163, "step": 4113 }, { "epoch": 0.3171446191797718, "grad_norm": 3.9110841751098633, "learning_rate": 7.989797102626862e-06, "loss": 1.015, "step": 4114 }, { "epoch": 0.3172217082947888, "grad_norm": 3.6422502994537354, "learning_rate": 7.988796346040182e-06, "loss": 1.0146, "step": 4115 }, { "epoch": 0.3172987974098057, "grad_norm": 3.7644057273864746, "learning_rate": 7.987795403117528e-06, "loss": 1.0263, "step": 4116 }, { "epoch": 0.3173758865248227, "grad_norm": 3.885549306869507, "learning_rate": 7.986794273921309e-06, "loss": 1.0733, "step": 4117 }, { "epoch": 0.31745297563983965, "grad_norm": 3.661712169647217, "learning_rate": 7.985792958513932e-06, "loss": 1.0752, "step": 4118 }, { "epoch": 0.3175300647548566, "grad_norm": 3.5156235694885254, "learning_rate": 7.98479145695783e-06, "loss": 0.9757, "step": 4119 }, { "epoch": 0.3176071538698736, "grad_norm": 3.432779312133789, "learning_rate": 7.983789769315438e-06, "loss": 1.0079, "step": 4120 }, { "epoch": 0.3176842429848905, "grad_norm": 3.5986807346343994, "learning_rate": 7.982787895649207e-06, "loss": 1.0681, "step": 4121 }, { "epoch": 0.3177613320999075, "grad_norm": 4.053248405456543, "learning_rate": 7.981785836021601e-06, "loss": 0.9929, "step": 4122 }, { "epoch": 0.31783842121492445, "grad_norm": 4.073676109313965, "learning_rate": 7.980783590495089e-06, "loss": 0.9481, "step": 4123 }, { "epoch": 0.3179155103299414, "grad_norm": 4.069306373596191, "learning_rate": 7.979781159132157e-06, "loss": 1.1544, "step": 4124 }, { "epoch": 0.3179925994449584, "grad_norm": 3.5793960094451904, "learning_rate": 7.978778541995304e-06, "loss": 1.0315, "step": 4125 }, { "epoch": 0.3180696885599753, "grad_norm": 3.4428999423980713, "learning_rate": 7.977775739147038e-06, "loss": 0.9997, "step": 4126 }, { "epoch": 0.3181467776749923, "grad_norm": 3.8016507625579834, "learning_rate": 7.976772750649874e-06, "loss": 0.9788, "step": 4127 }, { "epoch": 0.31822386679000925, "grad_norm": 3.3368539810180664, "learning_rate": 7.975769576566348e-06, "loss": 0.9229, "step": 4128 }, { "epoch": 0.3183009559050262, "grad_norm": 3.78816556930542, "learning_rate": 7.974766216959001e-06, "loss": 1.0265, "step": 4129 }, { "epoch": 0.3183780450200432, "grad_norm": 3.6755471229553223, "learning_rate": 7.973762671890387e-06, "loss": 1.0043, "step": 4130 }, { "epoch": 0.3184551341350601, "grad_norm": 3.3952746391296387, "learning_rate": 7.972758941423071e-06, "loss": 0.895, "step": 4131 }, { "epoch": 0.3185322232500771, "grad_norm": 3.5413053035736084, "learning_rate": 7.971755025619632e-06, "loss": 0.9635, "step": 4132 }, { "epoch": 0.31860931236509404, "grad_norm": 3.619885206222534, "learning_rate": 7.970750924542659e-06, "loss": 1.0574, "step": 4133 }, { "epoch": 0.318686401480111, "grad_norm": 3.1986031532287598, "learning_rate": 7.96974663825475e-06, "loss": 0.9205, "step": 4134 }, { "epoch": 0.318763490595128, "grad_norm": 3.69963002204895, "learning_rate": 7.968742166818521e-06, "loss": 1.0614, "step": 4135 }, { "epoch": 0.3188405797101449, "grad_norm": 3.788604736328125, "learning_rate": 7.967737510296591e-06, "loss": 1.0176, "step": 4136 }, { "epoch": 0.31891766882516187, "grad_norm": 3.8696343898773193, "learning_rate": 7.9667326687516e-06, "loss": 1.0235, "step": 4137 }, { "epoch": 0.31899475794017884, "grad_norm": 3.4079203605651855, "learning_rate": 7.965727642246191e-06, "loss": 0.9388, "step": 4138 }, { "epoch": 0.3190718470551958, "grad_norm": 4.057232856750488, "learning_rate": 7.964722430843021e-06, "loss": 1.059, "step": 4139 }, { "epoch": 0.3191489361702128, "grad_norm": 3.7363650798797607, "learning_rate": 7.963717034604765e-06, "loss": 1.0545, "step": 4140 }, { "epoch": 0.31922602528522975, "grad_norm": 3.5568385124206543, "learning_rate": 7.962711453594101e-06, "loss": 0.9876, "step": 4141 }, { "epoch": 0.31930311440024667, "grad_norm": 3.632174491882324, "learning_rate": 7.961705687873722e-06, "loss": 0.9774, "step": 4142 }, { "epoch": 0.31938020351526364, "grad_norm": 4.495326042175293, "learning_rate": 7.960699737506333e-06, "loss": 1.1305, "step": 4143 }, { "epoch": 0.3194572926302806, "grad_norm": 3.538480281829834, "learning_rate": 7.959693602554648e-06, "loss": 1.0082, "step": 4144 }, { "epoch": 0.3195343817452976, "grad_norm": 3.430492877960205, "learning_rate": 7.958687283081394e-06, "loss": 1.1023, "step": 4145 }, { "epoch": 0.31961147086031455, "grad_norm": 3.7447872161865234, "learning_rate": 7.957680779149315e-06, "loss": 1.0244, "step": 4146 }, { "epoch": 0.31968855997533147, "grad_norm": 3.6742663383483887, "learning_rate": 7.956674090821156e-06, "loss": 0.9341, "step": 4147 }, { "epoch": 0.31976564909034844, "grad_norm": 4.139322280883789, "learning_rate": 7.955667218159679e-06, "loss": 0.9498, "step": 4148 }, { "epoch": 0.3198427382053654, "grad_norm": 3.798419952392578, "learning_rate": 7.95466016122766e-06, "loss": 1.0443, "step": 4149 }, { "epoch": 0.3199198273203824, "grad_norm": 3.6184287071228027, "learning_rate": 7.953652920087884e-06, "loss": 0.9629, "step": 4150 }, { "epoch": 0.31999691643539935, "grad_norm": 3.602494478225708, "learning_rate": 7.952645494803145e-06, "loss": 1.0702, "step": 4151 }, { "epoch": 0.32007400555041626, "grad_norm": 4.118215084075928, "learning_rate": 7.95163788543625e-06, "loss": 0.9996, "step": 4152 }, { "epoch": 0.32015109466543323, "grad_norm": 3.864248037338257, "learning_rate": 7.950630092050022e-06, "loss": 1.0801, "step": 4153 }, { "epoch": 0.3202281837804502, "grad_norm": 3.5450005531311035, "learning_rate": 7.949622114707288e-06, "loss": 0.9193, "step": 4154 }, { "epoch": 0.3203052728954672, "grad_norm": 3.6530864238739014, "learning_rate": 7.948613953470892e-06, "loss": 0.8586, "step": 4155 }, { "epoch": 0.32038236201048415, "grad_norm": 3.8235666751861572, "learning_rate": 7.947605608403688e-06, "loss": 0.9473, "step": 4156 }, { "epoch": 0.32045945112550106, "grad_norm": 4.016197681427002, "learning_rate": 7.946597079568538e-06, "loss": 1.0146, "step": 4157 }, { "epoch": 0.32053654024051803, "grad_norm": 3.6410627365112305, "learning_rate": 7.945588367028324e-06, "loss": 0.9481, "step": 4158 }, { "epoch": 0.320613629355535, "grad_norm": 5.766059875488281, "learning_rate": 7.94457947084593e-06, "loss": 0.9786, "step": 4159 }, { "epoch": 0.32069071847055197, "grad_norm": 3.8257815837860107, "learning_rate": 7.943570391084254e-06, "loss": 1.1402, "step": 4160 }, { "epoch": 0.32076780758556894, "grad_norm": 4.60271692276001, "learning_rate": 7.942561127806212e-06, "loss": 1.0461, "step": 4161 }, { "epoch": 0.32084489670058586, "grad_norm": 3.0530004501342773, "learning_rate": 7.941551681074723e-06, "loss": 0.8957, "step": 4162 }, { "epoch": 0.32092198581560283, "grad_norm": 3.847745180130005, "learning_rate": 7.94054205095272e-06, "loss": 0.9406, "step": 4163 }, { "epoch": 0.3209990749306198, "grad_norm": 3.877429962158203, "learning_rate": 7.939532237503151e-06, "loss": 0.9846, "step": 4164 }, { "epoch": 0.32107616404563677, "grad_norm": 3.6080520153045654, "learning_rate": 7.93852224078897e-06, "loss": 1.0752, "step": 4165 }, { "epoch": 0.32115325316065374, "grad_norm": 3.4427030086517334, "learning_rate": 7.937512060873149e-06, "loss": 0.9899, "step": 4166 }, { "epoch": 0.32123034227567066, "grad_norm": 3.60064697265625, "learning_rate": 7.936501697818661e-06, "loss": 0.9592, "step": 4167 }, { "epoch": 0.3213074313906876, "grad_norm": 3.334470510482788, "learning_rate": 7.935491151688504e-06, "loss": 1.0138, "step": 4168 }, { "epoch": 0.3213845205057046, "grad_norm": 3.6496033668518066, "learning_rate": 7.934480422545674e-06, "loss": 1.0665, "step": 4169 }, { "epoch": 0.32146160962072157, "grad_norm": 3.650097131729126, "learning_rate": 7.933469510453189e-06, "loss": 1.085, "step": 4170 }, { "epoch": 0.32153869873573854, "grad_norm": 3.6096320152282715, "learning_rate": 7.932458415474073e-06, "loss": 0.9372, "step": 4171 }, { "epoch": 0.32161578785075545, "grad_norm": 4.109353065490723, "learning_rate": 7.931447137671364e-06, "loss": 1.0568, "step": 4172 }, { "epoch": 0.3216928769657724, "grad_norm": 3.8872742652893066, "learning_rate": 7.930435677108106e-06, "loss": 1.0137, "step": 4173 }, { "epoch": 0.3217699660807894, "grad_norm": 4.055763244628906, "learning_rate": 7.929424033847362e-06, "loss": 1.0044, "step": 4174 }, { "epoch": 0.32184705519580636, "grad_norm": 3.5979840755462646, "learning_rate": 7.9284122079522e-06, "loss": 0.982, "step": 4175 }, { "epoch": 0.32192414431082333, "grad_norm": 3.5764944553375244, "learning_rate": 7.927400199485705e-06, "loss": 0.9204, "step": 4176 }, { "epoch": 0.32200123342584025, "grad_norm": 3.7426390647888184, "learning_rate": 7.926388008510968e-06, "loss": 0.9989, "step": 4177 }, { "epoch": 0.3220783225408572, "grad_norm": 3.4947009086608887, "learning_rate": 7.925375635091094e-06, "loss": 0.959, "step": 4178 }, { "epoch": 0.3221554116558742, "grad_norm": 4.316697597503662, "learning_rate": 7.924363079289203e-06, "loss": 1.0835, "step": 4179 }, { "epoch": 0.32223250077089116, "grad_norm": 4.12495756149292, "learning_rate": 7.923350341168416e-06, "loss": 1.0846, "step": 4180 }, { "epoch": 0.32230958988590813, "grad_norm": 3.6122055053710938, "learning_rate": 7.922337420791879e-06, "loss": 0.8974, "step": 4181 }, { "epoch": 0.32238667900092505, "grad_norm": 3.608186960220337, "learning_rate": 7.921324318222737e-06, "loss": 1.066, "step": 4182 }, { "epoch": 0.322463768115942, "grad_norm": 3.8409266471862793, "learning_rate": 7.920311033524156e-06, "loss": 1.0132, "step": 4183 }, { "epoch": 0.322540857230959, "grad_norm": 4.176751136779785, "learning_rate": 7.919297566759304e-06, "loss": 1.0244, "step": 4184 }, { "epoch": 0.32261794634597596, "grad_norm": 3.635542869567871, "learning_rate": 7.918283917991367e-06, "loss": 1.0494, "step": 4185 }, { "epoch": 0.32269503546099293, "grad_norm": 3.756164073944092, "learning_rate": 7.917270087283544e-06, "loss": 1.0204, "step": 4186 }, { "epoch": 0.32277212457600984, "grad_norm": 3.905221939086914, "learning_rate": 7.91625607469904e-06, "loss": 0.9397, "step": 4187 }, { "epoch": 0.3228492136910268, "grad_norm": 3.6469991207122803, "learning_rate": 7.915241880301075e-06, "loss": 1.0839, "step": 4188 }, { "epoch": 0.3229263028060438, "grad_norm": 3.7983455657958984, "learning_rate": 7.914227504152874e-06, "loss": 0.9868, "step": 4189 }, { "epoch": 0.32300339192106076, "grad_norm": 3.3824074268341064, "learning_rate": 7.913212946317684e-06, "loss": 0.8911, "step": 4190 }, { "epoch": 0.3230804810360777, "grad_norm": 3.6762890815734863, "learning_rate": 7.912198206858752e-06, "loss": 1.0045, "step": 4191 }, { "epoch": 0.32315757015109464, "grad_norm": 4.075868129730225, "learning_rate": 7.911183285839347e-06, "loss": 1.0257, "step": 4192 }, { "epoch": 0.3232346592661116, "grad_norm": 3.5539023876190186, "learning_rate": 7.91016818332274e-06, "loss": 1.0092, "step": 4193 }, { "epoch": 0.3233117483811286, "grad_norm": 3.818380355834961, "learning_rate": 7.909152899372218e-06, "loss": 1.097, "step": 4194 }, { "epoch": 0.32338883749614555, "grad_norm": 3.38431453704834, "learning_rate": 7.908137434051083e-06, "loss": 0.896, "step": 4195 }, { "epoch": 0.3234659266111625, "grad_norm": 3.587066888809204, "learning_rate": 7.907121787422638e-06, "loss": 0.9987, "step": 4196 }, { "epoch": 0.32354301572617944, "grad_norm": 4.137657165527344, "learning_rate": 7.906105959550206e-06, "loss": 0.9355, "step": 4197 }, { "epoch": 0.3236201048411964, "grad_norm": 3.3995325565338135, "learning_rate": 7.90508995049712e-06, "loss": 0.9642, "step": 4198 }, { "epoch": 0.3236971939562134, "grad_norm": 3.7008519172668457, "learning_rate": 7.90407376032672e-06, "loss": 1.0123, "step": 4199 }, { "epoch": 0.32377428307123035, "grad_norm": 3.6094677448272705, "learning_rate": 7.903057389102361e-06, "loss": 0.9955, "step": 4200 }, { "epoch": 0.3238513721862473, "grad_norm": 3.729969024658203, "learning_rate": 7.902040836887413e-06, "loss": 0.9154, "step": 4201 }, { "epoch": 0.32392846130126424, "grad_norm": 3.7781825065612793, "learning_rate": 7.901024103745244e-06, "loss": 1.0113, "step": 4202 }, { "epoch": 0.3240055504162812, "grad_norm": 3.7726991176605225, "learning_rate": 7.90000718973925e-06, "loss": 1.0299, "step": 4203 }, { "epoch": 0.3240826395312982, "grad_norm": 3.447260618209839, "learning_rate": 7.898990094932826e-06, "loss": 1.0302, "step": 4204 }, { "epoch": 0.32415972864631515, "grad_norm": 3.4710397720336914, "learning_rate": 7.897972819389385e-06, "loss": 0.8961, "step": 4205 }, { "epoch": 0.3242368177613321, "grad_norm": 3.3661441802978516, "learning_rate": 7.896955363172347e-06, "loss": 0.9486, "step": 4206 }, { "epoch": 0.32431390687634903, "grad_norm": 3.684475898742676, "learning_rate": 7.895937726345145e-06, "loss": 0.9692, "step": 4207 }, { "epoch": 0.324390995991366, "grad_norm": 3.5266542434692383, "learning_rate": 7.894919908971225e-06, "loss": 1.0262, "step": 4208 }, { "epoch": 0.324468085106383, "grad_norm": 3.6420974731445312, "learning_rate": 7.893901911114041e-06, "loss": 1.0388, "step": 4209 }, { "epoch": 0.32454517422139995, "grad_norm": 3.7208940982818604, "learning_rate": 7.892883732837062e-06, "loss": 0.9965, "step": 4210 }, { "epoch": 0.3246222633364169, "grad_norm": 3.839670419692993, "learning_rate": 7.891865374203765e-06, "loss": 1.0087, "step": 4211 }, { "epoch": 0.32469935245143383, "grad_norm": 3.901395559310913, "learning_rate": 7.890846835277638e-06, "loss": 0.9884, "step": 4212 }, { "epoch": 0.3247764415664508, "grad_norm": 3.6374828815460205, "learning_rate": 7.889828116122183e-06, "loss": 1.0152, "step": 4213 }, { "epoch": 0.3248535306814678, "grad_norm": 4.1036763191223145, "learning_rate": 7.888809216800913e-06, "loss": 1.0062, "step": 4214 }, { "epoch": 0.32493061979648474, "grad_norm": 3.640856981277466, "learning_rate": 7.887790137377348e-06, "loss": 0.9845, "step": 4215 }, { "epoch": 0.3250077089115017, "grad_norm": 3.1671056747436523, "learning_rate": 7.886770877915027e-06, "loss": 0.892, "step": 4216 }, { "epoch": 0.32508479802651863, "grad_norm": 3.5967583656311035, "learning_rate": 7.885751438477489e-06, "loss": 1.0032, "step": 4217 }, { "epoch": 0.3251618871415356, "grad_norm": 3.60170316696167, "learning_rate": 7.884731819128298e-06, "loss": 0.9933, "step": 4218 }, { "epoch": 0.32523897625655257, "grad_norm": 3.8272790908813477, "learning_rate": 7.883712019931017e-06, "loss": 1.0296, "step": 4219 }, { "epoch": 0.32531606537156954, "grad_norm": 4.294167995452881, "learning_rate": 7.882692040949226e-06, "loss": 1.1175, "step": 4220 }, { "epoch": 0.3253931544865865, "grad_norm": 3.4372246265411377, "learning_rate": 7.881671882246518e-06, "loss": 1.0131, "step": 4221 }, { "epoch": 0.3254702436016034, "grad_norm": 4.079759120941162, "learning_rate": 7.880651543886491e-06, "loss": 0.9886, "step": 4222 }, { "epoch": 0.3255473327166204, "grad_norm": 5.348727226257324, "learning_rate": 7.87963102593276e-06, "loss": 0.9807, "step": 4223 }, { "epoch": 0.32562442183163737, "grad_norm": 3.5309953689575195, "learning_rate": 7.878610328448948e-06, "loss": 0.9316, "step": 4224 }, { "epoch": 0.32570151094665434, "grad_norm": 3.289616346359253, "learning_rate": 7.877589451498692e-06, "loss": 0.9425, "step": 4225 }, { "epoch": 0.3257786000616713, "grad_norm": 3.5728421211242676, "learning_rate": 7.876568395145636e-06, "loss": 0.8591, "step": 4226 }, { "epoch": 0.3258556891766883, "grad_norm": 3.5462687015533447, "learning_rate": 7.87554715945344e-06, "loss": 0.875, "step": 4227 }, { "epoch": 0.3259327782917052, "grad_norm": 3.9166672229766846, "learning_rate": 7.87452574448577e-06, "loss": 1.0411, "step": 4228 }, { "epoch": 0.32600986740672216, "grad_norm": 3.8413209915161133, "learning_rate": 7.873504150306308e-06, "loss": 0.9692, "step": 4229 }, { "epoch": 0.32608695652173914, "grad_norm": 4.090485095977783, "learning_rate": 7.872482376978746e-06, "loss": 1.0688, "step": 4230 }, { "epoch": 0.3261640456367561, "grad_norm": 3.474355697631836, "learning_rate": 7.87146042456678e-06, "loss": 1.0016, "step": 4231 }, { "epoch": 0.3262411347517731, "grad_norm": 3.8390350341796875, "learning_rate": 7.870438293134133e-06, "loss": 1.0577, "step": 4232 }, { "epoch": 0.32631822386679, "grad_norm": 4.005039691925049, "learning_rate": 7.869415982744524e-06, "loss": 0.8633, "step": 4233 }, { "epoch": 0.32639531298180696, "grad_norm": 3.2557785511016846, "learning_rate": 7.868393493461688e-06, "loss": 0.8518, "step": 4234 }, { "epoch": 0.32647240209682393, "grad_norm": 3.4779703617095947, "learning_rate": 7.867370825349375e-06, "loss": 1.043, "step": 4235 }, { "epoch": 0.3265494912118409, "grad_norm": 3.7263035774230957, "learning_rate": 7.86634797847134e-06, "loss": 1.0086, "step": 4236 }, { "epoch": 0.3266265803268579, "grad_norm": 3.4993364810943604, "learning_rate": 7.865324952891354e-06, "loss": 0.9986, "step": 4237 }, { "epoch": 0.3267036694418748, "grad_norm": 31.114559173583984, "learning_rate": 7.864301748673197e-06, "loss": 1.041, "step": 4238 }, { "epoch": 0.32678075855689176, "grad_norm": 3.9260265827178955, "learning_rate": 7.863278365880662e-06, "loss": 1.1013, "step": 4239 }, { "epoch": 0.32685784767190873, "grad_norm": 3.4418816566467285, "learning_rate": 7.862254804577549e-06, "loss": 0.9764, "step": 4240 }, { "epoch": 0.3269349367869257, "grad_norm": 3.3301095962524414, "learning_rate": 7.861231064827673e-06, "loss": 1.0296, "step": 4241 }, { "epoch": 0.32701202590194267, "grad_norm": 3.416653633117676, "learning_rate": 7.86020714669486e-06, "loss": 1.0018, "step": 4242 }, { "epoch": 0.3270891150169596, "grad_norm": 3.7522192001342773, "learning_rate": 7.859183050242945e-06, "loss": 0.8415, "step": 4243 }, { "epoch": 0.32716620413197656, "grad_norm": 3.878190279006958, "learning_rate": 7.858158775535773e-06, "loss": 1.0617, "step": 4244 }, { "epoch": 0.3272432932469935, "grad_norm": 3.7095284461975098, "learning_rate": 7.857134322637205e-06, "loss": 1.0556, "step": 4245 }, { "epoch": 0.3273203823620105, "grad_norm": 3.8554065227508545, "learning_rate": 7.85610969161111e-06, "loss": 1.0377, "step": 4246 }, { "epoch": 0.32739747147702747, "grad_norm": 3.8045499324798584, "learning_rate": 7.855084882521366e-06, "loss": 1.073, "step": 4247 }, { "epoch": 0.3274745605920444, "grad_norm": 4.000002384185791, "learning_rate": 7.854059895431869e-06, "loss": 1.0777, "step": 4248 }, { "epoch": 0.32755164970706135, "grad_norm": 4.595569610595703, "learning_rate": 7.853034730406516e-06, "loss": 0.9705, "step": 4249 }, { "epoch": 0.3276287388220783, "grad_norm": 3.743229627609253, "learning_rate": 7.852009387509227e-06, "loss": 1.0785, "step": 4250 }, { "epoch": 0.3277058279370953, "grad_norm": 3.717169761657715, "learning_rate": 7.850983866803923e-06, "loss": 1.0172, "step": 4251 }, { "epoch": 0.32778291705211227, "grad_norm": 3.679903984069824, "learning_rate": 7.849958168354538e-06, "loss": 0.9691, "step": 4252 }, { "epoch": 0.3278600061671292, "grad_norm": 3.682389497756958, "learning_rate": 7.848932292225025e-06, "loss": 1.0234, "step": 4253 }, { "epoch": 0.32793709528214615, "grad_norm": 4.109451770782471, "learning_rate": 7.847906238479337e-06, "loss": 1.0486, "step": 4254 }, { "epoch": 0.3280141843971631, "grad_norm": 4.467040061950684, "learning_rate": 7.846880007181443e-06, "loss": 1.0206, "step": 4255 }, { "epoch": 0.3280912735121801, "grad_norm": 3.9203736782073975, "learning_rate": 7.845853598395327e-06, "loss": 0.9341, "step": 4256 }, { "epoch": 0.32816836262719706, "grad_norm": 3.5691542625427246, "learning_rate": 7.844827012184978e-06, "loss": 0.9578, "step": 4257 }, { "epoch": 0.328245451742214, "grad_norm": 3.7378454208374023, "learning_rate": 7.843800248614396e-06, "loss": 1.04, "step": 4258 }, { "epoch": 0.32832254085723095, "grad_norm": 3.8135650157928467, "learning_rate": 7.8427733077476e-06, "loss": 1.021, "step": 4259 }, { "epoch": 0.3283996299722479, "grad_norm": 3.978492021560669, "learning_rate": 7.84174618964861e-06, "loss": 1.0348, "step": 4260 }, { "epoch": 0.3284767190872649, "grad_norm": 3.6482443809509277, "learning_rate": 7.840718894381464e-06, "loss": 0.9996, "step": 4261 }, { "epoch": 0.32855380820228186, "grad_norm": 3.369802474975586, "learning_rate": 7.839691422010208e-06, "loss": 0.9249, "step": 4262 }, { "epoch": 0.3286308973172988, "grad_norm": 3.726884603500366, "learning_rate": 7.838663772598897e-06, "loss": 0.9568, "step": 4263 }, { "epoch": 0.32870798643231575, "grad_norm": 3.5390725135803223, "learning_rate": 7.837635946211603e-06, "loss": 1.0817, "step": 4264 }, { "epoch": 0.3287850755473327, "grad_norm": 4.038122177124023, "learning_rate": 7.836607942912403e-06, "loss": 1.0059, "step": 4265 }, { "epoch": 0.3288621646623497, "grad_norm": 3.1766395568847656, "learning_rate": 7.83557976276539e-06, "loss": 0.8842, "step": 4266 }, { "epoch": 0.32893925377736666, "grad_norm": 3.975059986114502, "learning_rate": 7.834551405834665e-06, "loss": 1.1503, "step": 4267 }, { "epoch": 0.3290163428923836, "grad_norm": 4.144785404205322, "learning_rate": 7.833522872184338e-06, "loss": 0.974, "step": 4268 }, { "epoch": 0.32909343200740054, "grad_norm": 3.740612268447876, "learning_rate": 7.832494161878537e-06, "loss": 0.956, "step": 4269 }, { "epoch": 0.3291705211224175, "grad_norm": 3.4083251953125, "learning_rate": 7.831465274981395e-06, "loss": 0.9164, "step": 4270 }, { "epoch": 0.3292476102374345, "grad_norm": 3.277799367904663, "learning_rate": 7.830436211557057e-06, "loss": 0.9708, "step": 4271 }, { "epoch": 0.32932469935245146, "grad_norm": 3.846964120864868, "learning_rate": 7.82940697166968e-06, "loss": 0.9867, "step": 4272 }, { "epoch": 0.32940178846746837, "grad_norm": 3.877805233001709, "learning_rate": 7.828377555383433e-06, "loss": 1.0738, "step": 4273 }, { "epoch": 0.32947887758248534, "grad_norm": 3.754934310913086, "learning_rate": 7.827347962762495e-06, "loss": 1.0322, "step": 4274 }, { "epoch": 0.3295559666975023, "grad_norm": 4.1259284019470215, "learning_rate": 7.826318193871052e-06, "loss": 1.0407, "step": 4275 }, { "epoch": 0.3296330558125193, "grad_norm": 4.191363334655762, "learning_rate": 7.825288248773309e-06, "loss": 1.0295, "step": 4276 }, { "epoch": 0.32971014492753625, "grad_norm": 3.45347261428833, "learning_rate": 7.824258127533477e-06, "loss": 0.9255, "step": 4277 }, { "epoch": 0.32978723404255317, "grad_norm": 3.499629497528076, "learning_rate": 7.823227830215776e-06, "loss": 1.0538, "step": 4278 }, { "epoch": 0.32986432315757014, "grad_norm": 4.018231391906738, "learning_rate": 7.822197356884442e-06, "loss": 1.0472, "step": 4279 }, { "epoch": 0.3299414122725871, "grad_norm": 3.717026472091675, "learning_rate": 7.82116670760372e-06, "loss": 0.9186, "step": 4280 }, { "epoch": 0.3300185013876041, "grad_norm": 4.538501262664795, "learning_rate": 7.820135882437866e-06, "loss": 1.1079, "step": 4281 }, { "epoch": 0.33009559050262105, "grad_norm": 3.6810877323150635, "learning_rate": 7.819104881451145e-06, "loss": 0.9466, "step": 4282 }, { "epoch": 0.33017267961763797, "grad_norm": 3.926638603210449, "learning_rate": 7.818073704707834e-06, "loss": 0.9839, "step": 4283 }, { "epoch": 0.33024976873265494, "grad_norm": 3.823995590209961, "learning_rate": 7.817042352272224e-06, "loss": 1.0753, "step": 4284 }, { "epoch": 0.3303268578476719, "grad_norm": 3.6535205841064453, "learning_rate": 7.816010824208613e-06, "loss": 1.0285, "step": 4285 }, { "epoch": 0.3304039469626889, "grad_norm": 3.4686245918273926, "learning_rate": 7.814979120581311e-06, "loss": 0.9721, "step": 4286 }, { "epoch": 0.33048103607770585, "grad_norm": 3.8238701820373535, "learning_rate": 7.81394724145464e-06, "loss": 1.0513, "step": 4287 }, { "epoch": 0.33055812519272276, "grad_norm": 3.5260727405548096, "learning_rate": 7.812915186892933e-06, "loss": 0.9242, "step": 4288 }, { "epoch": 0.33063521430773973, "grad_norm": 3.374612331390381, "learning_rate": 7.811882956960532e-06, "loss": 0.984, "step": 4289 }, { "epoch": 0.3307123034227567, "grad_norm": 3.7520065307617188, "learning_rate": 7.810850551721793e-06, "loss": 0.9958, "step": 4290 }, { "epoch": 0.3307893925377737, "grad_norm": 3.351409435272217, "learning_rate": 7.809817971241079e-06, "loss": 0.9151, "step": 4291 }, { "epoch": 0.33086648165279064, "grad_norm": 3.3581125736236572, "learning_rate": 7.808785215582766e-06, "loss": 0.9481, "step": 4292 }, { "epoch": 0.33094357076780756, "grad_norm": 3.93084716796875, "learning_rate": 7.807752284811243e-06, "loss": 1.1273, "step": 4293 }, { "epoch": 0.33102065988282453, "grad_norm": 4.633861064910889, "learning_rate": 7.806719178990906e-06, "loss": 1.0091, "step": 4294 }, { "epoch": 0.3310977489978415, "grad_norm": 3.382524013519287, "learning_rate": 7.805685898186164e-06, "loss": 0.958, "step": 4295 }, { "epoch": 0.33117483811285847, "grad_norm": 3.2215330600738525, "learning_rate": 7.804652442461438e-06, "loss": 0.9924, "step": 4296 }, { "epoch": 0.33125192722787544, "grad_norm": 3.757098913192749, "learning_rate": 7.80361881188116e-06, "loss": 1.0064, "step": 4297 }, { "epoch": 0.33132901634289236, "grad_norm": 3.6601150035858154, "learning_rate": 7.802585006509766e-06, "loss": 0.9142, "step": 4298 }, { "epoch": 0.33140610545790933, "grad_norm": 4.117657661437988, "learning_rate": 7.801551026411715e-06, "loss": 1.1443, "step": 4299 }, { "epoch": 0.3314831945729263, "grad_norm": 3.925748109817505, "learning_rate": 7.800516871651465e-06, "loss": 0.9226, "step": 4300 }, { "epoch": 0.33156028368794327, "grad_norm": 3.6940126419067383, "learning_rate": 7.799482542293491e-06, "loss": 1.1027, "step": 4301 }, { "epoch": 0.33163737280296024, "grad_norm": 3.6253130435943604, "learning_rate": 7.798448038402283e-06, "loss": 0.9479, "step": 4302 }, { "epoch": 0.33171446191797715, "grad_norm": 3.360285758972168, "learning_rate": 7.79741336004233e-06, "loss": 0.9113, "step": 4303 }, { "epoch": 0.3317915510329941, "grad_norm": 3.5105605125427246, "learning_rate": 7.796378507278144e-06, "loss": 0.9038, "step": 4304 }, { "epoch": 0.3318686401480111, "grad_norm": 4.047402858734131, "learning_rate": 7.79534348017424e-06, "loss": 0.9321, "step": 4305 }, { "epoch": 0.33194572926302807, "grad_norm": 3.2786002159118652, "learning_rate": 7.794308278795148e-06, "loss": 0.9316, "step": 4306 }, { "epoch": 0.33202281837804504, "grad_norm": 3.5382752418518066, "learning_rate": 7.793272903205406e-06, "loss": 1.0196, "step": 4307 }, { "epoch": 0.33209990749306195, "grad_norm": 3.999952793121338, "learning_rate": 7.792237353469567e-06, "loss": 0.9702, "step": 4308 }, { "epoch": 0.3321769966080789, "grad_norm": 3.6367757320404053, "learning_rate": 7.791201629652189e-06, "loss": 0.9877, "step": 4309 }, { "epoch": 0.3322540857230959, "grad_norm": 5.327296733856201, "learning_rate": 7.790165731817847e-06, "loss": 0.8918, "step": 4310 }, { "epoch": 0.33233117483811286, "grad_norm": 3.5317442417144775, "learning_rate": 7.789129660031121e-06, "loss": 0.9834, "step": 4311 }, { "epoch": 0.33240826395312983, "grad_norm": 3.6593918800354004, "learning_rate": 7.788093414356605e-06, "loss": 1.0577, "step": 4312 }, { "epoch": 0.3324853530681468, "grad_norm": 3.4909262657165527, "learning_rate": 7.787056994858906e-06, "loss": 0.9607, "step": 4313 }, { "epoch": 0.3325624421831637, "grad_norm": 3.5621049404144287, "learning_rate": 7.786020401602638e-06, "loss": 0.9476, "step": 4314 }, { "epoch": 0.3326395312981807, "grad_norm": 3.4073727130889893, "learning_rate": 7.784983634652425e-06, "loss": 1.0184, "step": 4315 }, { "epoch": 0.33271662041319766, "grad_norm": 4.091675281524658, "learning_rate": 7.783946694072908e-06, "loss": 1.0435, "step": 4316 }, { "epoch": 0.33279370952821463, "grad_norm": 3.9657843112945557, "learning_rate": 7.782909579928733e-06, "loss": 1.0276, "step": 4317 }, { "epoch": 0.3328707986432316, "grad_norm": 3.8811726570129395, "learning_rate": 7.78187229228456e-06, "loss": 1.022, "step": 4318 }, { "epoch": 0.3329478877582485, "grad_norm": 4.559733867645264, "learning_rate": 7.780834831205056e-06, "loss": 1.0362, "step": 4319 }, { "epoch": 0.3330249768732655, "grad_norm": 3.447314500808716, "learning_rate": 7.779797196754901e-06, "loss": 0.9834, "step": 4320 }, { "epoch": 0.33310206598828246, "grad_norm": 3.546065330505371, "learning_rate": 7.77875938899879e-06, "loss": 0.9406, "step": 4321 }, { "epoch": 0.33317915510329943, "grad_norm": 3.527249336242676, "learning_rate": 7.777721408001421e-06, "loss": 1.0047, "step": 4322 }, { "epoch": 0.3332562442183164, "grad_norm": 3.692897319793701, "learning_rate": 7.77668325382751e-06, "loss": 1.1071, "step": 4323 }, { "epoch": 0.3333333333333333, "grad_norm": 3.6986513137817383, "learning_rate": 7.775644926541778e-06, "loss": 0.9281, "step": 4324 }, { "epoch": 0.3334104224483503, "grad_norm": 3.8168718814849854, "learning_rate": 7.77460642620896e-06, "loss": 1.0708, "step": 4325 }, { "epoch": 0.33348751156336726, "grad_norm": 3.7896907329559326, "learning_rate": 7.773567752893803e-06, "loss": 0.9714, "step": 4326 }, { "epoch": 0.3335646006783842, "grad_norm": 3.77909255027771, "learning_rate": 7.772528906661059e-06, "loss": 0.9923, "step": 4327 }, { "epoch": 0.3336416897934012, "grad_norm": 3.9389283657073975, "learning_rate": 7.771489887575498e-06, "loss": 1.0688, "step": 4328 }, { "epoch": 0.3337187789084181, "grad_norm": 3.601837635040283, "learning_rate": 7.770450695701896e-06, "loss": 1.0515, "step": 4329 }, { "epoch": 0.3337958680234351, "grad_norm": 4.004146099090576, "learning_rate": 7.769411331105044e-06, "loss": 1.0052, "step": 4330 }, { "epoch": 0.33387295713845205, "grad_norm": 3.7516090869903564, "learning_rate": 7.768371793849736e-06, "loss": 1.0585, "step": 4331 }, { "epoch": 0.333950046253469, "grad_norm": 3.6058356761932373, "learning_rate": 7.767332084000784e-06, "loss": 1.0234, "step": 4332 }, { "epoch": 0.334027135368486, "grad_norm": 3.795377016067505, "learning_rate": 7.766292201623012e-06, "loss": 1.0903, "step": 4333 }, { "epoch": 0.3341042244835029, "grad_norm": 3.492107391357422, "learning_rate": 7.765252146781245e-06, "loss": 0.8837, "step": 4334 }, { "epoch": 0.3341813135985199, "grad_norm": 3.525789976119995, "learning_rate": 7.76421191954033e-06, "loss": 0.8463, "step": 4335 }, { "epoch": 0.33425840271353685, "grad_norm": 4.0740509033203125, "learning_rate": 7.763171519965118e-06, "loss": 1.152, "step": 4336 }, { "epoch": 0.3343354918285538, "grad_norm": 3.6718363761901855, "learning_rate": 7.762130948120472e-06, "loss": 0.9143, "step": 4337 }, { "epoch": 0.3344125809435708, "grad_norm": 3.7916324138641357, "learning_rate": 7.761090204071267e-06, "loss": 0.9976, "step": 4338 }, { "epoch": 0.3344896700585877, "grad_norm": 3.5439155101776123, "learning_rate": 7.760049287882388e-06, "loss": 0.9737, "step": 4339 }, { "epoch": 0.3345667591736047, "grad_norm": 3.6218690872192383, "learning_rate": 7.75900819961873e-06, "loss": 0.9956, "step": 4340 }, { "epoch": 0.33464384828862165, "grad_norm": 3.703711986541748, "learning_rate": 7.757966939345201e-06, "loss": 0.854, "step": 4341 }, { "epoch": 0.3347209374036386, "grad_norm": 3.890967845916748, "learning_rate": 7.756925507126717e-06, "loss": 1.0805, "step": 4342 }, { "epoch": 0.3347980265186556, "grad_norm": 3.6906652450561523, "learning_rate": 7.755883903028205e-06, "loss": 1.0733, "step": 4343 }, { "epoch": 0.3348751156336725, "grad_norm": 3.4065797328948975, "learning_rate": 7.754842127114606e-06, "loss": 0.9758, "step": 4344 }, { "epoch": 0.3349522047486895, "grad_norm": 3.7064595222473145, "learning_rate": 7.753800179450867e-06, "loss": 1.0737, "step": 4345 }, { "epoch": 0.33502929386370645, "grad_norm": 3.8116419315338135, "learning_rate": 7.752758060101951e-06, "loss": 1.0783, "step": 4346 }, { "epoch": 0.3351063829787234, "grad_norm": 3.957881450653076, "learning_rate": 7.751715769132823e-06, "loss": 1.0667, "step": 4347 }, { "epoch": 0.3351834720937404, "grad_norm": 3.905587911605835, "learning_rate": 7.750673306608473e-06, "loss": 1.0612, "step": 4348 }, { "epoch": 0.3352605612087573, "grad_norm": 4.058816432952881, "learning_rate": 7.749630672593886e-06, "loss": 1.2289, "step": 4349 }, { "epoch": 0.3353376503237743, "grad_norm": 3.449509382247925, "learning_rate": 7.748587867154068e-06, "loss": 0.9291, "step": 4350 }, { "epoch": 0.33541473943879124, "grad_norm": 3.368961811065674, "learning_rate": 7.747544890354031e-06, "loss": 0.9528, "step": 4351 }, { "epoch": 0.3354918285538082, "grad_norm": 3.8779165744781494, "learning_rate": 7.7465017422588e-06, "loss": 1.075, "step": 4352 }, { "epoch": 0.3355689176688252, "grad_norm": 3.9211997985839844, "learning_rate": 7.745458422933409e-06, "loss": 0.9959, "step": 4353 }, { "epoch": 0.3356460067838421, "grad_norm": 3.9596736431121826, "learning_rate": 7.744414932442908e-06, "loss": 1.0908, "step": 4354 }, { "epoch": 0.33572309589885907, "grad_norm": 4.090301036834717, "learning_rate": 7.743371270852346e-06, "loss": 1.2187, "step": 4355 }, { "epoch": 0.33580018501387604, "grad_norm": 3.7745120525360107, "learning_rate": 7.742327438226796e-06, "loss": 0.9202, "step": 4356 }, { "epoch": 0.335877274128893, "grad_norm": 3.6797232627868652, "learning_rate": 7.74128343463133e-06, "loss": 0.8468, "step": 4357 }, { "epoch": 0.33595436324391, "grad_norm": 3.7442944049835205, "learning_rate": 7.740239260131042e-06, "loss": 0.974, "step": 4358 }, { "epoch": 0.3360314523589269, "grad_norm": 3.405045509338379, "learning_rate": 7.739194914791028e-06, "loss": 0.9661, "step": 4359 }, { "epoch": 0.33610854147394387, "grad_norm": 3.6131653785705566, "learning_rate": 7.738150398676397e-06, "loss": 1.014, "step": 4360 }, { "epoch": 0.33618563058896084, "grad_norm": 4.105853080749512, "learning_rate": 7.73710571185227e-06, "loss": 1.2013, "step": 4361 }, { "epoch": 0.3362627197039778, "grad_norm": 3.5816614627838135, "learning_rate": 7.736060854383778e-06, "loss": 0.8909, "step": 4362 }, { "epoch": 0.3363398088189948, "grad_norm": 3.8828015327453613, "learning_rate": 7.735015826336064e-06, "loss": 1.0176, "step": 4363 }, { "epoch": 0.3364168979340117, "grad_norm": 3.9706432819366455, "learning_rate": 7.733970627774275e-06, "loss": 1.0422, "step": 4364 }, { "epoch": 0.33649398704902866, "grad_norm": 4.061374664306641, "learning_rate": 7.732925258763577e-06, "loss": 0.9859, "step": 4365 }, { "epoch": 0.33657107616404563, "grad_norm": 3.8068432807922363, "learning_rate": 7.731879719369145e-06, "loss": 1.1827, "step": 4366 }, { "epoch": 0.3366481652790626, "grad_norm": 3.7083537578582764, "learning_rate": 7.73083400965616e-06, "loss": 0.994, "step": 4367 }, { "epoch": 0.3367252543940796, "grad_norm": 3.8531298637390137, "learning_rate": 7.72978812968982e-06, "loss": 0.8946, "step": 4368 }, { "epoch": 0.3368023435090965, "grad_norm": 3.4262845516204834, "learning_rate": 7.728742079535326e-06, "loss": 0.9598, "step": 4369 }, { "epoch": 0.33687943262411346, "grad_norm": 3.567720413208008, "learning_rate": 7.727695859257896e-06, "loss": 0.9415, "step": 4370 }, { "epoch": 0.33695652173913043, "grad_norm": 3.5674967765808105, "learning_rate": 7.726649468922756e-06, "loss": 0.9562, "step": 4371 }, { "epoch": 0.3370336108541474, "grad_norm": 4.217917442321777, "learning_rate": 7.725602908595144e-06, "loss": 1.0205, "step": 4372 }, { "epoch": 0.3371106999691644, "grad_norm": 3.5196731090545654, "learning_rate": 7.724556178340307e-06, "loss": 1.0654, "step": 4373 }, { "epoch": 0.3371877890841813, "grad_norm": 4.0760369300842285, "learning_rate": 7.723509278223504e-06, "loss": 0.9772, "step": 4374 }, { "epoch": 0.33726487819919826, "grad_norm": 3.8422629833221436, "learning_rate": 7.722462208310002e-06, "loss": 0.9518, "step": 4375 }, { "epoch": 0.33734196731421523, "grad_norm": 3.41591739654541, "learning_rate": 7.721414968665081e-06, "loss": 1.0063, "step": 4376 }, { "epoch": 0.3374190564292322, "grad_norm": 3.6463801860809326, "learning_rate": 7.720367559354032e-06, "loss": 1.0955, "step": 4377 }, { "epoch": 0.33749614554424917, "grad_norm": 4.359663963317871, "learning_rate": 7.719319980442154e-06, "loss": 0.9804, "step": 4378 }, { "epoch": 0.3375732346592661, "grad_norm": 3.783148765563965, "learning_rate": 7.718272231994759e-06, "loss": 0.9911, "step": 4379 }, { "epoch": 0.33765032377428306, "grad_norm": 4.130781173706055, "learning_rate": 7.71722431407717e-06, "loss": 1.0291, "step": 4380 }, { "epoch": 0.3377274128893, "grad_norm": 3.954774856567383, "learning_rate": 7.716176226754716e-06, "loss": 0.9907, "step": 4381 }, { "epoch": 0.337804502004317, "grad_norm": 3.640958547592163, "learning_rate": 7.715127970092742e-06, "loss": 0.9913, "step": 4382 }, { "epoch": 0.33788159111933397, "grad_norm": 3.703672170639038, "learning_rate": 7.714079544156602e-06, "loss": 0.9763, "step": 4383 }, { "epoch": 0.3379586802343509, "grad_norm": 3.6405487060546875, "learning_rate": 7.71303094901166e-06, "loss": 0.8953, "step": 4384 }, { "epoch": 0.33803576934936785, "grad_norm": 3.752634286880493, "learning_rate": 7.711982184723287e-06, "loss": 0.8807, "step": 4385 }, { "epoch": 0.3381128584643848, "grad_norm": 3.402266025543213, "learning_rate": 7.71093325135687e-06, "loss": 0.9654, "step": 4386 }, { "epoch": 0.3381899475794018, "grad_norm": 3.3396193981170654, "learning_rate": 7.709884148977808e-06, "loss": 0.8923, "step": 4387 }, { "epoch": 0.33826703669441877, "grad_norm": 4.080255031585693, "learning_rate": 7.708834877651504e-06, "loss": 0.9685, "step": 4388 }, { "epoch": 0.3383441258094357, "grad_norm": 3.752744436264038, "learning_rate": 7.70778543744337e-06, "loss": 1.0687, "step": 4389 }, { "epoch": 0.33842121492445265, "grad_norm": 4.078830718994141, "learning_rate": 7.706735828418844e-06, "loss": 1.1614, "step": 4390 }, { "epoch": 0.3384983040394696, "grad_norm": 3.583847999572754, "learning_rate": 7.705686050643354e-06, "loss": 1.0587, "step": 4391 }, { "epoch": 0.3385753931544866, "grad_norm": 3.779529094696045, "learning_rate": 7.704636104182353e-06, "loss": 1.0217, "step": 4392 }, { "epoch": 0.33865248226950356, "grad_norm": 3.4878971576690674, "learning_rate": 7.703585989101298e-06, "loss": 0.8588, "step": 4393 }, { "epoch": 0.3387295713845205, "grad_norm": 3.9853031635284424, "learning_rate": 7.70253570546566e-06, "loss": 1.0825, "step": 4394 }, { "epoch": 0.33880666049953745, "grad_norm": 3.818143606185913, "learning_rate": 7.701485253340917e-06, "loss": 1.0648, "step": 4395 }, { "epoch": 0.3388837496145544, "grad_norm": 3.480123281478882, "learning_rate": 7.70043463279256e-06, "loss": 0.9691, "step": 4396 }, { "epoch": 0.3389608387295714, "grad_norm": 3.3591983318328857, "learning_rate": 7.69938384388609e-06, "loss": 0.8988, "step": 4397 }, { "epoch": 0.33903792784458836, "grad_norm": 3.428823232650757, "learning_rate": 7.698332886687016e-06, "loss": 0.8853, "step": 4398 }, { "epoch": 0.33911501695960533, "grad_norm": 3.8618509769439697, "learning_rate": 7.697281761260863e-06, "loss": 1.0317, "step": 4399 }, { "epoch": 0.33919210607462225, "grad_norm": 3.7116105556488037, "learning_rate": 7.696230467673163e-06, "loss": 1.0838, "step": 4400 }, { "epoch": 0.3392691951896392, "grad_norm": 3.5771796703338623, "learning_rate": 7.695179005989454e-06, "loss": 0.995, "step": 4401 }, { "epoch": 0.3393462843046562, "grad_norm": 3.8581273555755615, "learning_rate": 7.694127376275295e-06, "loss": 0.9682, "step": 4402 }, { "epoch": 0.33942337341967316, "grad_norm": 3.888493776321411, "learning_rate": 7.693075578596247e-06, "loss": 1.0419, "step": 4403 }, { "epoch": 0.33950046253469013, "grad_norm": 4.1317877769470215, "learning_rate": 7.692023613017884e-06, "loss": 1.0923, "step": 4404 }, { "epoch": 0.33957755164970704, "grad_norm": 3.4401021003723145, "learning_rate": 7.690971479605793e-06, "loss": 0.9184, "step": 4405 }, { "epoch": 0.339654640764724, "grad_norm": 3.6244630813598633, "learning_rate": 7.689919178425565e-06, "loss": 0.9051, "step": 4406 }, { "epoch": 0.339731729879741, "grad_norm": 3.5508549213409424, "learning_rate": 7.688866709542809e-06, "loss": 1.0023, "step": 4407 }, { "epoch": 0.33980881899475796, "grad_norm": 3.756072521209717, "learning_rate": 7.687814073023138e-06, "loss": 0.9766, "step": 4408 }, { "epoch": 0.3398859081097749, "grad_norm": 4.206667423248291, "learning_rate": 7.686761268932182e-06, "loss": 1.0179, "step": 4409 }, { "epoch": 0.33996299722479184, "grad_norm": 3.559318780899048, "learning_rate": 7.685708297335575e-06, "loss": 1.0625, "step": 4410 }, { "epoch": 0.3400400863398088, "grad_norm": 3.7180213928222656, "learning_rate": 7.684655158298963e-06, "loss": 1.0346, "step": 4411 }, { "epoch": 0.3401171754548258, "grad_norm": 3.8383474349975586, "learning_rate": 7.68360185188801e-06, "loss": 0.945, "step": 4412 }, { "epoch": 0.34019426456984275, "grad_norm": 3.727611780166626, "learning_rate": 7.682548378168379e-06, "loss": 1.0908, "step": 4413 }, { "epoch": 0.3402713536848597, "grad_norm": 3.4572296142578125, "learning_rate": 7.681494737205748e-06, "loss": 0.9101, "step": 4414 }, { "epoch": 0.34034844279987664, "grad_norm": 4.0167741775512695, "learning_rate": 7.680440929065811e-06, "loss": 1.0263, "step": 4415 }, { "epoch": 0.3404255319148936, "grad_norm": 4.128690719604492, "learning_rate": 7.679386953814262e-06, "loss": 1.1199, "step": 4416 }, { "epoch": 0.3405026210299106, "grad_norm": 3.2454535961151123, "learning_rate": 7.678332811516815e-06, "loss": 0.9708, "step": 4417 }, { "epoch": 0.34057971014492755, "grad_norm": 3.2851145267486572, "learning_rate": 7.677278502239187e-06, "loss": 0.8788, "step": 4418 }, { "epoch": 0.3406567992599445, "grad_norm": 3.677515983581543, "learning_rate": 7.676224026047112e-06, "loss": 0.9379, "step": 4419 }, { "epoch": 0.34073388837496144, "grad_norm": 3.384584426879883, "learning_rate": 7.67516938300633e-06, "loss": 0.8972, "step": 4420 }, { "epoch": 0.3408109774899784, "grad_norm": 3.8261232376098633, "learning_rate": 7.674114573182589e-06, "loss": 1.0569, "step": 4421 }, { "epoch": 0.3408880666049954, "grad_norm": 3.638108491897583, "learning_rate": 7.673059596641657e-06, "loss": 1.0784, "step": 4422 }, { "epoch": 0.34096515572001235, "grad_norm": 3.599555730819702, "learning_rate": 7.672004453449301e-06, "loss": 1.0344, "step": 4423 }, { "epoch": 0.3410422448350293, "grad_norm": 3.3416149616241455, "learning_rate": 7.670949143671306e-06, "loss": 0.8663, "step": 4424 }, { "epoch": 0.34111933395004623, "grad_norm": 3.894286632537842, "learning_rate": 7.669893667373466e-06, "loss": 0.9725, "step": 4425 }, { "epoch": 0.3411964230650632, "grad_norm": 3.740086078643799, "learning_rate": 7.668838024621585e-06, "loss": 0.9731, "step": 4426 }, { "epoch": 0.3412735121800802, "grad_norm": 3.730048179626465, "learning_rate": 7.667782215481474e-06, "loss": 0.912, "step": 4427 }, { "epoch": 0.34135060129509714, "grad_norm": 3.557861328125, "learning_rate": 7.66672624001896e-06, "loss": 0.9324, "step": 4428 }, { "epoch": 0.3414276904101141, "grad_norm": 3.767613649368286, "learning_rate": 7.665670098299874e-06, "loss": 0.8642, "step": 4429 }, { "epoch": 0.34150477952513103, "grad_norm": 4.100453853607178, "learning_rate": 7.664613790390065e-06, "loss": 1.0388, "step": 4430 }, { "epoch": 0.341581868640148, "grad_norm": 3.8830795288085938, "learning_rate": 7.663557316355386e-06, "loss": 1.005, "step": 4431 }, { "epoch": 0.34165895775516497, "grad_norm": 3.5792341232299805, "learning_rate": 7.662500676261703e-06, "loss": 0.954, "step": 4432 }, { "epoch": 0.34173604687018194, "grad_norm": 3.8511476516723633, "learning_rate": 7.661443870174892e-06, "loss": 1.0085, "step": 4433 }, { "epoch": 0.3418131359851989, "grad_norm": 3.9277069568634033, "learning_rate": 7.66038689816084e-06, "loss": 1.0202, "step": 4434 }, { "epoch": 0.3418902251002158, "grad_norm": 3.5222573280334473, "learning_rate": 7.659329760285443e-06, "loss": 0.9149, "step": 4435 }, { "epoch": 0.3419673142152328, "grad_norm": 3.536393404006958, "learning_rate": 7.658272456614609e-06, "loss": 0.9427, "step": 4436 }, { "epoch": 0.34204440333024977, "grad_norm": 4.047040939331055, "learning_rate": 7.657214987214254e-06, "loss": 1.0167, "step": 4437 }, { "epoch": 0.34212149244526674, "grad_norm": 3.8045451641082764, "learning_rate": 7.65615735215031e-06, "loss": 1.0209, "step": 4438 }, { "epoch": 0.3421985815602837, "grad_norm": 3.7481818199157715, "learning_rate": 7.655099551488708e-06, "loss": 1.0584, "step": 4439 }, { "epoch": 0.3422756706753006, "grad_norm": 3.487293004989624, "learning_rate": 7.6540415852954e-06, "loss": 0.9386, "step": 4440 }, { "epoch": 0.3423527597903176, "grad_norm": 3.341665267944336, "learning_rate": 7.652983453636346e-06, "loss": 0.935, "step": 4441 }, { "epoch": 0.34242984890533457, "grad_norm": 3.843733310699463, "learning_rate": 7.651925156577514e-06, "loss": 1.0647, "step": 4442 }, { "epoch": 0.34250693802035154, "grad_norm": 3.3451244831085205, "learning_rate": 7.650866694184883e-06, "loss": 1.0161, "step": 4443 }, { "epoch": 0.3425840271353685, "grad_norm": 3.6213300228118896, "learning_rate": 7.649808066524442e-06, "loss": 0.994, "step": 4444 }, { "epoch": 0.3426611162503854, "grad_norm": 4.26886510848999, "learning_rate": 7.648749273662194e-06, "loss": 0.8898, "step": 4445 }, { "epoch": 0.3427382053654024, "grad_norm": 3.5714519023895264, "learning_rate": 7.647690315664145e-06, "loss": 1.028, "step": 4446 }, { "epoch": 0.34281529448041936, "grad_norm": 4.256858825683594, "learning_rate": 7.646631192596317e-06, "loss": 0.9492, "step": 4447 }, { "epoch": 0.34289238359543633, "grad_norm": 3.999039649963379, "learning_rate": 7.645571904524745e-06, "loss": 1.0845, "step": 4448 }, { "epoch": 0.3429694727104533, "grad_norm": 3.7288503646850586, "learning_rate": 7.644512451515462e-06, "loss": 0.9635, "step": 4449 }, { "epoch": 0.3430465618254702, "grad_norm": 3.4463119506835938, "learning_rate": 7.643452833634527e-06, "loss": 0.9414, "step": 4450 }, { "epoch": 0.3431236509404872, "grad_norm": 3.423847198486328, "learning_rate": 7.642393050947997e-06, "loss": 0.9491, "step": 4451 }, { "epoch": 0.34320074005550416, "grad_norm": 4.031202793121338, "learning_rate": 7.641333103521945e-06, "loss": 1.1798, "step": 4452 }, { "epoch": 0.34327782917052113, "grad_norm": 4.064019203186035, "learning_rate": 7.640272991422456e-06, "loss": 1.1341, "step": 4453 }, { "epoch": 0.3433549182855381, "grad_norm": 3.5513041019439697, "learning_rate": 7.63921271471562e-06, "loss": 1.0552, "step": 4454 }, { "epoch": 0.343432007400555, "grad_norm": 3.514437437057495, "learning_rate": 7.638152273467538e-06, "loss": 1.086, "step": 4455 }, { "epoch": 0.343509096515572, "grad_norm": 3.334420680999756, "learning_rate": 7.637091667744326e-06, "loss": 0.8893, "step": 4456 }, { "epoch": 0.34358618563058896, "grad_norm": 3.7241454124450684, "learning_rate": 7.636030897612108e-06, "loss": 0.9269, "step": 4457 }, { "epoch": 0.34366327474560593, "grad_norm": 4.0841755867004395, "learning_rate": 7.634969963137015e-06, "loss": 0.9552, "step": 4458 }, { "epoch": 0.3437403638606229, "grad_norm": 3.496382236480713, "learning_rate": 7.633908864385191e-06, "loss": 0.8975, "step": 4459 }, { "epoch": 0.3438174529756398, "grad_norm": 3.9809958934783936, "learning_rate": 7.63284760142279e-06, "loss": 1.0904, "step": 4460 }, { "epoch": 0.3438945420906568, "grad_norm": 3.6975362300872803, "learning_rate": 7.631786174315978e-06, "loss": 0.9786, "step": 4461 }, { "epoch": 0.34397163120567376, "grad_norm": 4.022948265075684, "learning_rate": 7.630724583130929e-06, "loss": 0.9845, "step": 4462 }, { "epoch": 0.3440487203206907, "grad_norm": 3.6614363193511963, "learning_rate": 7.629662827933827e-06, "loss": 1.0014, "step": 4463 }, { "epoch": 0.3441258094357077, "grad_norm": 3.6465506553649902, "learning_rate": 7.628600908790867e-06, "loss": 0.97, "step": 4464 }, { "epoch": 0.3442028985507246, "grad_norm": 3.7315773963928223, "learning_rate": 7.627538825768253e-06, "loss": 0.9663, "step": 4465 }, { "epoch": 0.3442799876657416, "grad_norm": 3.598884344100952, "learning_rate": 7.626476578932202e-06, "loss": 0.9673, "step": 4466 }, { "epoch": 0.34435707678075855, "grad_norm": 3.5586609840393066, "learning_rate": 7.625414168348939e-06, "loss": 1.0037, "step": 4467 }, { "epoch": 0.3444341658957755, "grad_norm": 3.392164468765259, "learning_rate": 7.6243515940847005e-06, "loss": 1.0022, "step": 4468 }, { "epoch": 0.3445112550107925, "grad_norm": 3.7672579288482666, "learning_rate": 7.623288856205733e-06, "loss": 0.9914, "step": 4469 }, { "epoch": 0.3445883441258094, "grad_norm": 3.6310787200927734, "learning_rate": 7.62222595477829e-06, "loss": 0.9816, "step": 4470 }, { "epoch": 0.3446654332408264, "grad_norm": 3.9517312049865723, "learning_rate": 7.6211628898686386e-06, "loss": 1.0428, "step": 4471 }, { "epoch": 0.34474252235584335, "grad_norm": 3.518615245819092, "learning_rate": 7.620099661543059e-06, "loss": 0.8673, "step": 4472 }, { "epoch": 0.3448196114708603, "grad_norm": 3.833643674850464, "learning_rate": 7.619036269867835e-06, "loss": 0.9825, "step": 4473 }, { "epoch": 0.3448967005858773, "grad_norm": 3.776273012161255, "learning_rate": 7.617972714909263e-06, "loss": 0.9213, "step": 4474 }, { "epoch": 0.3449737897008942, "grad_norm": 4.035387992858887, "learning_rate": 7.616908996733651e-06, "loss": 0.9992, "step": 4475 }, { "epoch": 0.3450508788159112, "grad_norm": 3.4117751121520996, "learning_rate": 7.615845115407316e-06, "loss": 0.9576, "step": 4476 }, { "epoch": 0.34512796793092815, "grad_norm": 3.680464267730713, "learning_rate": 7.6147810709965864e-06, "loss": 0.952, "step": 4477 }, { "epoch": 0.3452050570459451, "grad_norm": 3.7311110496520996, "learning_rate": 7.6137168635677996e-06, "loss": 1.094, "step": 4478 }, { "epoch": 0.3452821461609621, "grad_norm": 3.6351912021636963, "learning_rate": 7.612652493187302e-06, "loss": 1.0033, "step": 4479 }, { "epoch": 0.345359235275979, "grad_norm": 3.4685473442077637, "learning_rate": 7.611587959921453e-06, "loss": 1.0091, "step": 4480 }, { "epoch": 0.345436324390996, "grad_norm": 3.7951955795288086, "learning_rate": 7.6105232638366225e-06, "loss": 0.9334, "step": 4481 }, { "epoch": 0.34551341350601295, "grad_norm": 3.6663379669189453, "learning_rate": 7.609458404999186e-06, "loss": 0.998, "step": 4482 }, { "epoch": 0.3455905026210299, "grad_norm": 3.7089757919311523, "learning_rate": 7.608393383475532e-06, "loss": 0.9697, "step": 4483 }, { "epoch": 0.3456675917360469, "grad_norm": 3.706681489944458, "learning_rate": 7.607328199332059e-06, "loss": 0.9436, "step": 4484 }, { "epoch": 0.34574468085106386, "grad_norm": 3.7007081508636475, "learning_rate": 7.606262852635178e-06, "loss": 1.0404, "step": 4485 }, { "epoch": 0.34582176996608077, "grad_norm": 3.912898063659668, "learning_rate": 7.605197343451305e-06, "loss": 1.0742, "step": 4486 }, { "epoch": 0.34589885908109774, "grad_norm": 3.450777292251587, "learning_rate": 7.604131671846872e-06, "loss": 0.9221, "step": 4487 }, { "epoch": 0.3459759481961147, "grad_norm": 3.4001710414886475, "learning_rate": 7.6030658378883145e-06, "loss": 0.9292, "step": 4488 }, { "epoch": 0.3460530373111317, "grad_norm": 3.4011619091033936, "learning_rate": 7.601999841642085e-06, "loss": 0.9305, "step": 4489 }, { "epoch": 0.34613012642614865, "grad_norm": 3.7454307079315186, "learning_rate": 7.60093368317464e-06, "loss": 1.0018, "step": 4490 }, { "epoch": 0.34620721554116557, "grad_norm": 3.7179946899414062, "learning_rate": 7.599867362552451e-06, "loss": 0.9987, "step": 4491 }, { "epoch": 0.34628430465618254, "grad_norm": 3.8563878536224365, "learning_rate": 7.598800879841998e-06, "loss": 1.0384, "step": 4492 }, { "epoch": 0.3463613937711995, "grad_norm": 3.63057804107666, "learning_rate": 7.5977342351097694e-06, "loss": 1.0281, "step": 4493 }, { "epoch": 0.3464384828862165, "grad_norm": 3.7689592838287354, "learning_rate": 7.596667428422264e-06, "loss": 1.0204, "step": 4494 }, { "epoch": 0.34651557200123345, "grad_norm": 4.023124694824219, "learning_rate": 7.595600459845994e-06, "loss": 1.1758, "step": 4495 }, { "epoch": 0.34659266111625037, "grad_norm": 3.7010927200317383, "learning_rate": 7.594533329447479e-06, "loss": 1.0144, "step": 4496 }, { "epoch": 0.34666975023126734, "grad_norm": 3.7244632244110107, "learning_rate": 7.593466037293247e-06, "loss": 0.9995, "step": 4497 }, { "epoch": 0.3467468393462843, "grad_norm": 3.411987781524658, "learning_rate": 7.5923985834498405e-06, "loss": 0.943, "step": 4498 }, { "epoch": 0.3468239284613013, "grad_norm": 3.9309608936309814, "learning_rate": 7.5913309679838074e-06, "loss": 0.9554, "step": 4499 }, { "epoch": 0.34690101757631825, "grad_norm": 3.5526673793792725, "learning_rate": 7.590263190961711e-06, "loss": 0.9885, "step": 4500 }, { "epoch": 0.34697810669133516, "grad_norm": 3.6589465141296387, "learning_rate": 7.589195252450118e-06, "loss": 1.0778, "step": 4501 }, { "epoch": 0.34705519580635213, "grad_norm": 3.7149205207824707, "learning_rate": 7.588127152515611e-06, "loss": 1.0306, "step": 4502 }, { "epoch": 0.3471322849213691, "grad_norm": 4.081293106079102, "learning_rate": 7.587058891224781e-06, "loss": 0.9663, "step": 4503 }, { "epoch": 0.3472093740363861, "grad_norm": 3.5740342140197754, "learning_rate": 7.585990468644229e-06, "loss": 1.0298, "step": 4504 }, { "epoch": 0.34728646315140305, "grad_norm": 3.4450860023498535, "learning_rate": 7.584921884840563e-06, "loss": 0.9135, "step": 4505 }, { "epoch": 0.34736355226641996, "grad_norm": 3.9143495559692383, "learning_rate": 7.583853139880406e-06, "loss": 1.092, "step": 4506 }, { "epoch": 0.34744064138143693, "grad_norm": 3.25154447555542, "learning_rate": 7.5827842338303866e-06, "loss": 0.9825, "step": 4507 }, { "epoch": 0.3475177304964539, "grad_norm": 3.853174924850464, "learning_rate": 7.581715166757147e-06, "loss": 0.9713, "step": 4508 }, { "epoch": 0.3475948196114709, "grad_norm": 3.6071982383728027, "learning_rate": 7.58064593872734e-06, "loss": 0.9837, "step": 4509 }, { "epoch": 0.34767190872648784, "grad_norm": 3.3574037551879883, "learning_rate": 7.579576549807621e-06, "loss": 0.8727, "step": 4510 }, { "epoch": 0.34774899784150476, "grad_norm": 3.408640146255493, "learning_rate": 7.578507000064668e-06, "loss": 0.9032, "step": 4511 }, { "epoch": 0.34782608695652173, "grad_norm": 3.5927014350891113, "learning_rate": 7.5774372895651545e-06, "loss": 1.0204, "step": 4512 }, { "epoch": 0.3479031760715387, "grad_norm": 4.023251533508301, "learning_rate": 7.576367418375776e-06, "loss": 1.0703, "step": 4513 }, { "epoch": 0.34798026518655567, "grad_norm": 3.498119831085205, "learning_rate": 7.575297386563232e-06, "loss": 1.0155, "step": 4514 }, { "epoch": 0.34805735430157264, "grad_norm": 4.321279525756836, "learning_rate": 7.574227194194234e-06, "loss": 1.0152, "step": 4515 }, { "epoch": 0.34813444341658956, "grad_norm": 3.662602186203003, "learning_rate": 7.573156841335503e-06, "loss": 1.0795, "step": 4516 }, { "epoch": 0.3482115325316065, "grad_norm": 3.4204254150390625, "learning_rate": 7.572086328053769e-06, "loss": 0.9286, "step": 4517 }, { "epoch": 0.3482886216466235, "grad_norm": 4.003498554229736, "learning_rate": 7.571015654415774e-06, "loss": 0.991, "step": 4518 }, { "epoch": 0.34836571076164047, "grad_norm": 3.632580518722534, "learning_rate": 7.5699448204882684e-06, "loss": 0.9635, "step": 4519 }, { "epoch": 0.34844279987665744, "grad_norm": 3.763371706008911, "learning_rate": 7.568873826338015e-06, "loss": 1.0865, "step": 4520 }, { "epoch": 0.34851988899167435, "grad_norm": 3.367755889892578, "learning_rate": 7.567802672031781e-06, "loss": 0.9446, "step": 4521 }, { "epoch": 0.3485969781066913, "grad_norm": 3.568023204803467, "learning_rate": 7.56673135763635e-06, "loss": 0.9173, "step": 4522 }, { "epoch": 0.3486740672217083, "grad_norm": 3.6380043029785156, "learning_rate": 7.565659883218512e-06, "loss": 0.9864, "step": 4523 }, { "epoch": 0.34875115633672527, "grad_norm": 3.404252290725708, "learning_rate": 7.56458824884507e-06, "loss": 0.9846, "step": 4524 }, { "epoch": 0.34882824545174224, "grad_norm": 3.7746942043304443, "learning_rate": 7.563516454582831e-06, "loss": 0.9901, "step": 4525 }, { "epoch": 0.34890533456675915, "grad_norm": 3.6926469802856445, "learning_rate": 7.562444500498618e-06, "loss": 1.0837, "step": 4526 }, { "epoch": 0.3489824236817761, "grad_norm": 3.7840046882629395, "learning_rate": 7.561372386659262e-06, "loss": 1.1675, "step": 4527 }, { "epoch": 0.3490595127967931, "grad_norm": 4.055497646331787, "learning_rate": 7.560300113131604e-06, "loss": 1.0338, "step": 4528 }, { "epoch": 0.34913660191181006, "grad_norm": 4.383268356323242, "learning_rate": 7.559227679982493e-06, "loss": 1.0946, "step": 4529 }, { "epoch": 0.34921369102682703, "grad_norm": 3.687899351119995, "learning_rate": 7.558155087278791e-06, "loss": 0.8959, "step": 4530 }, { "epoch": 0.34929078014184395, "grad_norm": 4.111197471618652, "learning_rate": 7.557082335087369e-06, "loss": 0.9816, "step": 4531 }, { "epoch": 0.3493678692568609, "grad_norm": 3.7356393337249756, "learning_rate": 7.556009423475106e-06, "loss": 0.9332, "step": 4532 }, { "epoch": 0.3494449583718779, "grad_norm": 3.6677143573760986, "learning_rate": 7.554936352508895e-06, "loss": 1.0524, "step": 4533 }, { "epoch": 0.34952204748689486, "grad_norm": 3.7901206016540527, "learning_rate": 7.5538631222556325e-06, "loss": 0.9411, "step": 4534 }, { "epoch": 0.34959913660191183, "grad_norm": 3.7148752212524414, "learning_rate": 7.552789732782233e-06, "loss": 1.0595, "step": 4535 }, { "epoch": 0.34967622571692875, "grad_norm": 3.53884220123291, "learning_rate": 7.551716184155614e-06, "loss": 0.969, "step": 4536 }, { "epoch": 0.3497533148319457, "grad_norm": 3.777893543243408, "learning_rate": 7.5506424764427065e-06, "loss": 1.011, "step": 4537 }, { "epoch": 0.3498304039469627, "grad_norm": 3.4374454021453857, "learning_rate": 7.549568609710451e-06, "loss": 0.9996, "step": 4538 }, { "epoch": 0.34990749306197966, "grad_norm": 3.2913565635681152, "learning_rate": 7.548494584025797e-06, "loss": 0.9579, "step": 4539 }, { "epoch": 0.34998458217699663, "grad_norm": 3.424445152282715, "learning_rate": 7.547420399455705e-06, "loss": 0.9822, "step": 4540 }, { "epoch": 0.35006167129201354, "grad_norm": 3.8123433589935303, "learning_rate": 7.5463460560671446e-06, "loss": 0.9697, "step": 4541 }, { "epoch": 0.3501387604070305, "grad_norm": 3.7088186740875244, "learning_rate": 7.5452715539270945e-06, "loss": 1.0171, "step": 4542 }, { "epoch": 0.3502158495220475, "grad_norm": 3.776679754257202, "learning_rate": 7.544196893102547e-06, "loss": 1.0093, "step": 4543 }, { "epoch": 0.35029293863706445, "grad_norm": 3.353478193283081, "learning_rate": 7.543122073660498e-06, "loss": 0.8819, "step": 4544 }, { "epoch": 0.3503700277520814, "grad_norm": 3.6976723670959473, "learning_rate": 7.542047095667959e-06, "loss": 1.0509, "step": 4545 }, { "epoch": 0.35044711686709834, "grad_norm": 4.122467517852783, "learning_rate": 7.540971959191952e-06, "loss": 1.1499, "step": 4546 }, { "epoch": 0.3505242059821153, "grad_norm": 3.71630859375, "learning_rate": 7.5398966642995e-06, "loss": 1.0094, "step": 4547 }, { "epoch": 0.3506012950971323, "grad_norm": 3.93642258644104, "learning_rate": 7.538821211057648e-06, "loss": 1.1285, "step": 4548 }, { "epoch": 0.35067838421214925, "grad_norm": 3.9508731365203857, "learning_rate": 7.537745599533442e-06, "loss": 1.2053, "step": 4549 }, { "epoch": 0.3507554733271662, "grad_norm": 4.088351726531982, "learning_rate": 7.536669829793939e-06, "loss": 0.9771, "step": 4550 }, { "epoch": 0.35083256244218314, "grad_norm": 3.7594566345214844, "learning_rate": 7.535593901906212e-06, "loss": 1.0617, "step": 4551 }, { "epoch": 0.3509096515572001, "grad_norm": 3.9711499214172363, "learning_rate": 7.534517815937336e-06, "loss": 1.0532, "step": 4552 }, { "epoch": 0.3509867406722171, "grad_norm": 4.007244110107422, "learning_rate": 7.533441571954401e-06, "loss": 1.0847, "step": 4553 }, { "epoch": 0.35106382978723405, "grad_norm": 3.750323534011841, "learning_rate": 7.532365170024507e-06, "loss": 0.951, "step": 4554 }, { "epoch": 0.351140918902251, "grad_norm": 3.2767701148986816, "learning_rate": 7.531288610214758e-06, "loss": 1.0074, "step": 4555 }, { "epoch": 0.35121800801726794, "grad_norm": 4.005361080169678, "learning_rate": 7.530211892592274e-06, "loss": 1.0625, "step": 4556 }, { "epoch": 0.3512950971322849, "grad_norm": 3.4741199016571045, "learning_rate": 7.529135017224185e-06, "loss": 0.9702, "step": 4557 }, { "epoch": 0.3513721862473019, "grad_norm": 4.308572292327881, "learning_rate": 7.528057984177624e-06, "loss": 1.1244, "step": 4558 }, { "epoch": 0.35144927536231885, "grad_norm": 3.521604299545288, "learning_rate": 7.526980793519742e-06, "loss": 0.9298, "step": 4559 }, { "epoch": 0.3515263644773358, "grad_norm": 3.766963243484497, "learning_rate": 7.525903445317695e-06, "loss": 0.9063, "step": 4560 }, { "epoch": 0.35160345359235273, "grad_norm": 3.509277582168579, "learning_rate": 7.524825939638649e-06, "loss": 0.9582, "step": 4561 }, { "epoch": 0.3516805427073697, "grad_norm": 3.2499780654907227, "learning_rate": 7.5237482765497835e-06, "loss": 0.9871, "step": 4562 }, { "epoch": 0.3517576318223867, "grad_norm": 3.8931937217712402, "learning_rate": 7.522670456118284e-06, "loss": 1.0293, "step": 4563 }, { "epoch": 0.35183472093740364, "grad_norm": 4.080838203430176, "learning_rate": 7.521592478411346e-06, "loss": 0.9787, "step": 4564 }, { "epoch": 0.3519118100524206, "grad_norm": 3.7860987186431885, "learning_rate": 7.520514343496177e-06, "loss": 1.0544, "step": 4565 }, { "epoch": 0.3519888991674376, "grad_norm": 3.832820415496826, "learning_rate": 7.519436051439991e-06, "loss": 1.045, "step": 4566 }, { "epoch": 0.3520659882824545, "grad_norm": 3.489471912384033, "learning_rate": 7.518357602310018e-06, "loss": 1.0061, "step": 4567 }, { "epoch": 0.35214307739747147, "grad_norm": 3.8605971336364746, "learning_rate": 7.517278996173489e-06, "loss": 1.1125, "step": 4568 }, { "epoch": 0.35222016651248844, "grad_norm": 3.234241247177124, "learning_rate": 7.516200233097655e-06, "loss": 0.9031, "step": 4569 }, { "epoch": 0.3522972556275054, "grad_norm": 3.4962003231048584, "learning_rate": 7.515121313149767e-06, "loss": 1.0268, "step": 4570 }, { "epoch": 0.3523743447425224, "grad_norm": 4.1234211921691895, "learning_rate": 7.51404223639709e-06, "loss": 1.0732, "step": 4571 }, { "epoch": 0.3524514338575393, "grad_norm": 3.3490140438079834, "learning_rate": 7.512963002906902e-06, "loss": 0.9798, "step": 4572 }, { "epoch": 0.35252852297255627, "grad_norm": 3.6099109649658203, "learning_rate": 7.5118836127464845e-06, "loss": 1.0534, "step": 4573 }, { "epoch": 0.35260561208757324, "grad_norm": 3.4530441761016846, "learning_rate": 7.5108040659831325e-06, "loss": 0.96, "step": 4574 }, { "epoch": 0.3526827012025902, "grad_norm": 3.9595425128936768, "learning_rate": 7.509724362684153e-06, "loss": 0.99, "step": 4575 }, { "epoch": 0.3527597903176072, "grad_norm": 3.436499834060669, "learning_rate": 7.508644502916857e-06, "loss": 1.0278, "step": 4576 }, { "epoch": 0.3528368794326241, "grad_norm": 3.768009901046753, "learning_rate": 7.507564486748567e-06, "loss": 0.9352, "step": 4577 }, { "epoch": 0.35291396854764107, "grad_norm": 3.8910608291625977, "learning_rate": 7.50648431424662e-06, "loss": 1.1054, "step": 4578 }, { "epoch": 0.35299105766265804, "grad_norm": 3.816119909286499, "learning_rate": 7.5054039854783565e-06, "loss": 1.0385, "step": 4579 }, { "epoch": 0.353068146777675, "grad_norm": 3.278871774673462, "learning_rate": 7.5043235005111314e-06, "loss": 0.9679, "step": 4580 }, { "epoch": 0.353145235892692, "grad_norm": 3.655571222305298, "learning_rate": 7.503242859412306e-06, "loss": 1.0785, "step": 4581 }, { "epoch": 0.3532223250077089, "grad_norm": 3.78136944770813, "learning_rate": 7.502162062249252e-06, "loss": 0.9802, "step": 4582 }, { "epoch": 0.35329941412272586, "grad_norm": 3.6029317378997803, "learning_rate": 7.501081109089354e-06, "loss": 0.991, "step": 4583 }, { "epoch": 0.35337650323774283, "grad_norm": 3.5113279819488525, "learning_rate": 7.500000000000001e-06, "loss": 1.0691, "step": 4584 }, { "epoch": 0.3534535923527598, "grad_norm": 3.781364679336548, "learning_rate": 7.498918735048596e-06, "loss": 0.9593, "step": 4585 }, { "epoch": 0.3535306814677768, "grad_norm": 3.832432270050049, "learning_rate": 7.497837314302551e-06, "loss": 0.933, "step": 4586 }, { "epoch": 0.3536077705827937, "grad_norm": 3.3761022090911865, "learning_rate": 7.496755737829284e-06, "loss": 0.9601, "step": 4587 }, { "epoch": 0.35368485969781066, "grad_norm": 3.549546241760254, "learning_rate": 7.4956740056962294e-06, "loss": 0.9896, "step": 4588 }, { "epoch": 0.35376194881282763, "grad_norm": 4.057165622711182, "learning_rate": 7.494592117970826e-06, "loss": 1.0498, "step": 4589 }, { "epoch": 0.3538390379278446, "grad_norm": 3.55377197265625, "learning_rate": 7.493510074720523e-06, "loss": 0.9978, "step": 4590 }, { "epoch": 0.3539161270428616, "grad_norm": 3.4123051166534424, "learning_rate": 7.492427876012782e-06, "loss": 0.9557, "step": 4591 }, { "epoch": 0.3539932161578785, "grad_norm": 3.7144315242767334, "learning_rate": 7.491345521915071e-06, "loss": 0.9637, "step": 4592 }, { "epoch": 0.35407030527289546, "grad_norm": 3.7125496864318848, "learning_rate": 7.490263012494869e-06, "loss": 0.9757, "step": 4593 }, { "epoch": 0.35414739438791243, "grad_norm": 3.407020092010498, "learning_rate": 7.489180347819668e-06, "loss": 0.987, "step": 4594 }, { "epoch": 0.3542244835029294, "grad_norm": 3.3304386138916016, "learning_rate": 7.488097527956962e-06, "loss": 0.9076, "step": 4595 }, { "epoch": 0.35430157261794637, "grad_norm": 3.7230682373046875, "learning_rate": 7.487014552974263e-06, "loss": 1.0456, "step": 4596 }, { "epoch": 0.3543786617329633, "grad_norm": 3.5402743816375732, "learning_rate": 7.4859314229390856e-06, "loss": 1.0437, "step": 4597 }, { "epoch": 0.35445575084798026, "grad_norm": 3.747596025466919, "learning_rate": 7.484848137918958e-06, "loss": 1.0386, "step": 4598 }, { "epoch": 0.3545328399629972, "grad_norm": 3.624826192855835, "learning_rate": 7.483764697981422e-06, "loss": 0.9824, "step": 4599 }, { "epoch": 0.3546099290780142, "grad_norm": 3.802626609802246, "learning_rate": 7.482681103194018e-06, "loss": 1.0693, "step": 4600 }, { "epoch": 0.35468701819303117, "grad_norm": 3.6754977703094482, "learning_rate": 7.481597353624306e-06, "loss": 0.9418, "step": 4601 }, { "epoch": 0.3547641073080481, "grad_norm": 3.6683568954467773, "learning_rate": 7.480513449339851e-06, "loss": 1.0815, "step": 4602 }, { "epoch": 0.35484119642306505, "grad_norm": 3.4328901767730713, "learning_rate": 7.47942939040823e-06, "loss": 0.888, "step": 4603 }, { "epoch": 0.354918285538082, "grad_norm": 3.344658374786377, "learning_rate": 7.478345176897027e-06, "loss": 0.8972, "step": 4604 }, { "epoch": 0.354995374653099, "grad_norm": 3.514586925506592, "learning_rate": 7.4772608088738395e-06, "loss": 0.8603, "step": 4605 }, { "epoch": 0.35507246376811596, "grad_norm": 3.7426164150238037, "learning_rate": 7.4761762864062694e-06, "loss": 0.8437, "step": 4606 }, { "epoch": 0.3551495528831329, "grad_norm": 4.099521636962891, "learning_rate": 7.475091609561933e-06, "loss": 0.9417, "step": 4607 }, { "epoch": 0.35522664199814985, "grad_norm": 4.204078674316406, "learning_rate": 7.4740067784084534e-06, "loss": 1.0172, "step": 4608 }, { "epoch": 0.3553037311131668, "grad_norm": 3.2971291542053223, "learning_rate": 7.472921793013463e-06, "loss": 0.9418, "step": 4609 }, { "epoch": 0.3553808202281838, "grad_norm": 3.2113230228424072, "learning_rate": 7.4718366534446085e-06, "loss": 0.9096, "step": 4610 }, { "epoch": 0.35545790934320076, "grad_norm": 3.6125385761260986, "learning_rate": 7.4707513597695396e-06, "loss": 0.8899, "step": 4611 }, { "epoch": 0.3555349984582177, "grad_norm": 3.8694043159484863, "learning_rate": 7.469665912055919e-06, "loss": 1.0594, "step": 4612 }, { "epoch": 0.35561208757323465, "grad_norm": 3.883565902709961, "learning_rate": 7.4685803103714204e-06, "loss": 0.8622, "step": 4613 }, { "epoch": 0.3556891766882516, "grad_norm": 3.652117967605591, "learning_rate": 7.4674945547837245e-06, "loss": 1.045, "step": 4614 }, { "epoch": 0.3557662658032686, "grad_norm": 3.6539828777313232, "learning_rate": 7.466408645360524e-06, "loss": 1.117, "step": 4615 }, { "epoch": 0.35584335491828556, "grad_norm": 3.4314699172973633, "learning_rate": 7.465322582169516e-06, "loss": 0.8358, "step": 4616 }, { "epoch": 0.3559204440333025, "grad_norm": 3.411808729171753, "learning_rate": 7.464236365278413e-06, "loss": 0.9704, "step": 4617 }, { "epoch": 0.35599753314831944, "grad_norm": 3.6066629886627197, "learning_rate": 7.463149994754938e-06, "loss": 0.9963, "step": 4618 }, { "epoch": 0.3560746222633364, "grad_norm": 3.517822742462158, "learning_rate": 7.462063470666816e-06, "loss": 0.9606, "step": 4619 }, { "epoch": 0.3561517113783534, "grad_norm": 3.6595687866210938, "learning_rate": 7.460976793081789e-06, "loss": 0.944, "step": 4620 }, { "epoch": 0.35622880049337036, "grad_norm": 3.9949941635131836, "learning_rate": 7.459889962067603e-06, "loss": 1.1163, "step": 4621 }, { "epoch": 0.35630588960838727, "grad_norm": 3.570941209793091, "learning_rate": 7.458802977692018e-06, "loss": 1.02, "step": 4622 }, { "epoch": 0.35638297872340424, "grad_norm": 3.210967540740967, "learning_rate": 7.4577158400228034e-06, "loss": 0.8364, "step": 4623 }, { "epoch": 0.3564600678384212, "grad_norm": 3.722440719604492, "learning_rate": 7.4566285491277334e-06, "loss": 0.9846, "step": 4624 }, { "epoch": 0.3565371569534382, "grad_norm": 3.4632368087768555, "learning_rate": 7.455541105074598e-06, "loss": 1.0375, "step": 4625 }, { "epoch": 0.35661424606845515, "grad_norm": 3.4531891345977783, "learning_rate": 7.454453507931192e-06, "loss": 0.8937, "step": 4626 }, { "epoch": 0.35669133518347207, "grad_norm": 3.6453826427459717, "learning_rate": 7.45336575776532e-06, "loss": 0.9928, "step": 4627 }, { "epoch": 0.35676842429848904, "grad_norm": 3.3601088523864746, "learning_rate": 7.452277854644801e-06, "loss": 0.8869, "step": 4628 }, { "epoch": 0.356845513413506, "grad_norm": 3.36553955078125, "learning_rate": 7.45118979863746e-06, "loss": 0.9932, "step": 4629 }, { "epoch": 0.356922602528523, "grad_norm": 3.6436736583709717, "learning_rate": 7.450101589811127e-06, "loss": 1.0522, "step": 4630 }, { "epoch": 0.35699969164353995, "grad_norm": 3.701080083847046, "learning_rate": 7.449013228233651e-06, "loss": 0.7591, "step": 4631 }, { "epoch": 0.35707678075855687, "grad_norm": 3.6947665214538574, "learning_rate": 7.447924713972883e-06, "loss": 0.9894, "step": 4632 }, { "epoch": 0.35715386987357384, "grad_norm": 3.8331315517425537, "learning_rate": 7.4468360470966875e-06, "loss": 0.9405, "step": 4633 }, { "epoch": 0.3572309589885908, "grad_norm": 3.559194803237915, "learning_rate": 7.445747227672937e-06, "loss": 1.0249, "step": 4634 }, { "epoch": 0.3573080481036078, "grad_norm": 4.069003582000732, "learning_rate": 7.444658255769513e-06, "loss": 1.0037, "step": 4635 }, { "epoch": 0.35738513721862475, "grad_norm": 3.7197611331939697, "learning_rate": 7.4435691314543105e-06, "loss": 1.0408, "step": 4636 }, { "epoch": 0.35746222633364166, "grad_norm": 3.3894405364990234, "learning_rate": 7.442479854795226e-06, "loss": 1.0021, "step": 4637 }, { "epoch": 0.35753931544865863, "grad_norm": 3.269364833831787, "learning_rate": 7.441390425860172e-06, "loss": 0.9195, "step": 4638 }, { "epoch": 0.3576164045636756, "grad_norm": 3.6949777603149414, "learning_rate": 7.440300844717071e-06, "loss": 1.111, "step": 4639 }, { "epoch": 0.3576934936786926, "grad_norm": 3.708047389984131, "learning_rate": 7.43921111143385e-06, "loss": 1.0689, "step": 4640 }, { "epoch": 0.35777058279370955, "grad_norm": 3.682943105697632, "learning_rate": 7.438121226078449e-06, "loss": 1.041, "step": 4641 }, { "epoch": 0.35784767190872646, "grad_norm": 3.447502613067627, "learning_rate": 7.437031188718818e-06, "loss": 0.9712, "step": 4642 }, { "epoch": 0.35792476102374343, "grad_norm": 3.6242482662200928, "learning_rate": 7.435940999422913e-06, "loss": 0.9694, "step": 4643 }, { "epoch": 0.3580018501387604, "grad_norm": 3.254422903060913, "learning_rate": 7.434850658258704e-06, "loss": 0.8993, "step": 4644 }, { "epoch": 0.3580789392537774, "grad_norm": 3.3965585231781006, "learning_rate": 7.433760165294166e-06, "loss": 0.9396, "step": 4645 }, { "epoch": 0.35815602836879434, "grad_norm": 3.665678024291992, "learning_rate": 7.4326695205972865e-06, "loss": 1.008, "step": 4646 }, { "epoch": 0.35823311748381126, "grad_norm": 3.5869436264038086, "learning_rate": 7.431578724236062e-06, "loss": 0.965, "step": 4647 }, { "epoch": 0.35831020659882823, "grad_norm": 3.809087038040161, "learning_rate": 7.430487776278497e-06, "loss": 0.9678, "step": 4648 }, { "epoch": 0.3583872957138452, "grad_norm": 3.8319554328918457, "learning_rate": 7.429396676792607e-06, "loss": 1.0688, "step": 4649 }, { "epoch": 0.35846438482886217, "grad_norm": 3.6857879161834717, "learning_rate": 7.428305425846416e-06, "loss": 1.0452, "step": 4650 }, { "epoch": 0.35854147394387914, "grad_norm": 3.5876760482788086, "learning_rate": 7.427214023507958e-06, "loss": 1.0092, "step": 4651 }, { "epoch": 0.3586185630588961, "grad_norm": 3.979463815689087, "learning_rate": 7.426122469845277e-06, "loss": 1.1185, "step": 4652 }, { "epoch": 0.358695652173913, "grad_norm": 3.456955909729004, "learning_rate": 7.4250307649264265e-06, "loss": 0.9516, "step": 4653 }, { "epoch": 0.35877274128893, "grad_norm": 3.4941182136535645, "learning_rate": 7.423938908819466e-06, "loss": 0.945, "step": 4654 }, { "epoch": 0.35884983040394697, "grad_norm": 3.835120677947998, "learning_rate": 7.4228469015924675e-06, "loss": 0.9512, "step": 4655 }, { "epoch": 0.35892691951896394, "grad_norm": 3.682276964187622, "learning_rate": 7.421754743313514e-06, "loss": 1.0016, "step": 4656 }, { "epoch": 0.3590040086339809, "grad_norm": 3.423520088195801, "learning_rate": 7.420662434050695e-06, "loss": 0.8797, "step": 4657 }, { "epoch": 0.3590810977489978, "grad_norm": 3.420644998550415, "learning_rate": 7.41956997387211e-06, "loss": 0.8758, "step": 4658 }, { "epoch": 0.3591581868640148, "grad_norm": 3.3552937507629395, "learning_rate": 7.418477362845868e-06, "loss": 0.9692, "step": 4659 }, { "epoch": 0.35923527597903177, "grad_norm": 3.6283655166625977, "learning_rate": 7.41738460104009e-06, "loss": 1.0536, "step": 4660 }, { "epoch": 0.35931236509404874, "grad_norm": 3.8392016887664795, "learning_rate": 7.4162916885229e-06, "loss": 0.9439, "step": 4661 }, { "epoch": 0.3593894542090657, "grad_norm": 3.771641492843628, "learning_rate": 7.415198625362437e-06, "loss": 1.0178, "step": 4662 }, { "epoch": 0.3594665433240826, "grad_norm": 3.600102424621582, "learning_rate": 7.414105411626851e-06, "loss": 1.0072, "step": 4663 }, { "epoch": 0.3595436324390996, "grad_norm": 3.391453742980957, "learning_rate": 7.413012047384292e-06, "loss": 0.9686, "step": 4664 }, { "epoch": 0.35962072155411656, "grad_norm": 3.72904634475708, "learning_rate": 7.411918532702932e-06, "loss": 1.0089, "step": 4665 }, { "epoch": 0.35969781066913353, "grad_norm": 3.5617527961730957, "learning_rate": 7.410824867650943e-06, "loss": 0.9439, "step": 4666 }, { "epoch": 0.3597748997841505, "grad_norm": 3.4571316242218018, "learning_rate": 7.409731052296508e-06, "loss": 0.9568, "step": 4667 }, { "epoch": 0.3598519888991674, "grad_norm": 3.765029191970825, "learning_rate": 7.408637086707824e-06, "loss": 1.071, "step": 4668 }, { "epoch": 0.3599290780141844, "grad_norm": 3.57428240776062, "learning_rate": 7.4075429709530896e-06, "loss": 1.0565, "step": 4669 }, { "epoch": 0.36000616712920136, "grad_norm": 3.6238112449645996, "learning_rate": 7.4064487051005215e-06, "loss": 1.02, "step": 4670 }, { "epoch": 0.36008325624421833, "grad_norm": 3.5156960487365723, "learning_rate": 7.405354289218341e-06, "loss": 0.9572, "step": 4671 }, { "epoch": 0.3601603453592353, "grad_norm": 3.847799062728882, "learning_rate": 7.404259723374778e-06, "loss": 0.8973, "step": 4672 }, { "epoch": 0.3602374344742522, "grad_norm": 3.390068531036377, "learning_rate": 7.403165007638071e-06, "loss": 0.9345, "step": 4673 }, { "epoch": 0.3603145235892692, "grad_norm": 4.004146575927734, "learning_rate": 7.402070142076475e-06, "loss": 0.9548, "step": 4674 }, { "epoch": 0.36039161270428616, "grad_norm": 3.443192720413208, "learning_rate": 7.400975126758244e-06, "loss": 1.0265, "step": 4675 }, { "epoch": 0.36046870181930313, "grad_norm": 3.7502102851867676, "learning_rate": 7.39987996175165e-06, "loss": 0.9425, "step": 4676 }, { "epoch": 0.3605457909343201, "grad_norm": 3.6927671432495117, "learning_rate": 7.398784647124969e-06, "loss": 0.9693, "step": 4677 }, { "epoch": 0.360622880049337, "grad_norm": 3.445166826248169, "learning_rate": 7.397689182946489e-06, "loss": 1.0462, "step": 4678 }, { "epoch": 0.360699969164354, "grad_norm": 3.651033401489258, "learning_rate": 7.396593569284507e-06, "loss": 1.0167, "step": 4679 }, { "epoch": 0.36077705827937095, "grad_norm": 3.4636173248291016, "learning_rate": 7.395497806207328e-06, "loss": 0.9994, "step": 4680 }, { "epoch": 0.3608541473943879, "grad_norm": 3.4041786193847656, "learning_rate": 7.394401893783267e-06, "loss": 0.9804, "step": 4681 }, { "epoch": 0.3609312365094049, "grad_norm": 3.829814910888672, "learning_rate": 7.393305832080649e-06, "loss": 0.893, "step": 4682 }, { "epoch": 0.3610083256244218, "grad_norm": 3.543970823287964, "learning_rate": 7.392209621167808e-06, "loss": 0.9415, "step": 4683 }, { "epoch": 0.3610854147394388, "grad_norm": 3.526637315750122, "learning_rate": 7.3911132611130865e-06, "loss": 0.8818, "step": 4684 }, { "epoch": 0.36116250385445575, "grad_norm": 3.4459733963012695, "learning_rate": 7.390016751984837e-06, "loss": 0.9302, "step": 4685 }, { "epoch": 0.3612395929694727, "grad_norm": 3.7464380264282227, "learning_rate": 7.388920093851422e-06, "loss": 0.9382, "step": 4686 }, { "epoch": 0.3613166820844897, "grad_norm": 3.308462381362915, "learning_rate": 7.387823286781211e-06, "loss": 1.0056, "step": 4687 }, { "epoch": 0.3613937711995066, "grad_norm": 3.8184940814971924, "learning_rate": 7.386726330842584e-06, "loss": 1.075, "step": 4688 }, { "epoch": 0.3614708603145236, "grad_norm": 3.9437508583068848, "learning_rate": 7.385629226103932e-06, "loss": 1.0295, "step": 4689 }, { "epoch": 0.36154794942954055, "grad_norm": 3.679063558578491, "learning_rate": 7.384531972633654e-06, "loss": 1.067, "step": 4690 }, { "epoch": 0.3616250385445575, "grad_norm": 3.801813840866089, "learning_rate": 7.383434570500156e-06, "loss": 0.9291, "step": 4691 }, { "epoch": 0.3617021276595745, "grad_norm": 3.7192230224609375, "learning_rate": 7.382337019771859e-06, "loss": 1.0775, "step": 4692 }, { "epoch": 0.3617792167745914, "grad_norm": 3.7992069721221924, "learning_rate": 7.381239320517185e-06, "loss": 1.0711, "step": 4693 }, { "epoch": 0.3618563058896084, "grad_norm": 3.7021334171295166, "learning_rate": 7.380141472804572e-06, "loss": 0.9285, "step": 4694 }, { "epoch": 0.36193339500462535, "grad_norm": 3.7016396522521973, "learning_rate": 7.379043476702468e-06, "loss": 0.9494, "step": 4695 }, { "epoch": 0.3620104841196423, "grad_norm": 3.413544178009033, "learning_rate": 7.377945332279322e-06, "loss": 0.9474, "step": 4696 }, { "epoch": 0.3620875732346593, "grad_norm": 3.475022077560425, "learning_rate": 7.376847039603601e-06, "loss": 1.0284, "step": 4697 }, { "epoch": 0.3621646623496762, "grad_norm": 4.036199569702148, "learning_rate": 7.375748598743777e-06, "loss": 0.9613, "step": 4698 }, { "epoch": 0.3622417514646932, "grad_norm": 3.2614598274230957, "learning_rate": 7.374650009768332e-06, "loss": 0.9619, "step": 4699 }, { "epoch": 0.36231884057971014, "grad_norm": 3.5047171115875244, "learning_rate": 7.373551272745757e-06, "loss": 0.995, "step": 4700 }, { "epoch": 0.3623959296947271, "grad_norm": 3.3241288661956787, "learning_rate": 7.372452387744554e-06, "loss": 0.8698, "step": 4701 }, { "epoch": 0.3624730188097441, "grad_norm": 3.509653091430664, "learning_rate": 7.371353354833231e-06, "loss": 1.053, "step": 4702 }, { "epoch": 0.362550107924761, "grad_norm": 4.059185981750488, "learning_rate": 7.370254174080309e-06, "loss": 1.1282, "step": 4703 }, { "epoch": 0.36262719703977797, "grad_norm": 3.687779426574707, "learning_rate": 7.3691548455543146e-06, "loss": 0.9489, "step": 4704 }, { "epoch": 0.36270428615479494, "grad_norm": 3.582054615020752, "learning_rate": 7.368055369323787e-06, "loss": 0.9714, "step": 4705 }, { "epoch": 0.3627813752698119, "grad_norm": 3.6636664867401123, "learning_rate": 7.36695574545727e-06, "loss": 1.0117, "step": 4706 }, { "epoch": 0.3628584643848289, "grad_norm": 3.7531189918518066, "learning_rate": 7.365855974023321e-06, "loss": 0.9775, "step": 4707 }, { "epoch": 0.3629355534998458, "grad_norm": 3.9122326374053955, "learning_rate": 7.364756055090506e-06, "loss": 1.0191, "step": 4708 }, { "epoch": 0.36301264261486277, "grad_norm": 4.153857231140137, "learning_rate": 7.363655988727398e-06, "loss": 1.0468, "step": 4709 }, { "epoch": 0.36308973172987974, "grad_norm": 3.230003833770752, "learning_rate": 7.36255577500258e-06, "loss": 0.8938, "step": 4710 }, { "epoch": 0.3631668208448967, "grad_norm": 3.6824822425842285, "learning_rate": 7.3614554139846475e-06, "loss": 0.9479, "step": 4711 }, { "epoch": 0.3632439099599137, "grad_norm": 3.7477197647094727, "learning_rate": 7.3603549057421975e-06, "loss": 1.0547, "step": 4712 }, { "epoch": 0.3633209990749306, "grad_norm": 3.6179938316345215, "learning_rate": 7.359254250343846e-06, "loss": 0.9269, "step": 4713 }, { "epoch": 0.36339808818994757, "grad_norm": 3.606153964996338, "learning_rate": 7.35815344785821e-06, "loss": 1.08, "step": 4714 }, { "epoch": 0.36347517730496454, "grad_norm": 3.634908676147461, "learning_rate": 7.35705249835392e-06, "loss": 0.8419, "step": 4715 }, { "epoch": 0.3635522664199815, "grad_norm": 3.726848602294922, "learning_rate": 7.355951401899614e-06, "loss": 1.0174, "step": 4716 }, { "epoch": 0.3636293555349985, "grad_norm": 3.769972562789917, "learning_rate": 7.35485015856394e-06, "loss": 0.9593, "step": 4717 }, { "epoch": 0.3637064446500154, "grad_norm": 4.418411731719971, "learning_rate": 7.353748768415554e-06, "loss": 1.0226, "step": 4718 }, { "epoch": 0.36378353376503236, "grad_norm": 3.3944032192230225, "learning_rate": 7.352647231523124e-06, "loss": 0.9723, "step": 4719 }, { "epoch": 0.36386062288004933, "grad_norm": 3.6218276023864746, "learning_rate": 7.351545547955321e-06, "loss": 1.0031, "step": 4720 }, { "epoch": 0.3639377119950663, "grad_norm": 3.672168016433716, "learning_rate": 7.350443717780834e-06, "loss": 0.9893, "step": 4721 }, { "epoch": 0.3640148011100833, "grad_norm": 3.8659307956695557, "learning_rate": 7.3493417410683545e-06, "loss": 0.9818, "step": 4722 }, { "epoch": 0.3640918902251002, "grad_norm": 3.814786672592163, "learning_rate": 7.3482396178865846e-06, "loss": 1.0346, "step": 4723 }, { "epoch": 0.36416897934011716, "grad_norm": 3.595031976699829, "learning_rate": 7.347137348304237e-06, "loss": 0.9958, "step": 4724 }, { "epoch": 0.36424606845513413, "grad_norm": 3.76802921295166, "learning_rate": 7.346034932390031e-06, "loss": 0.9374, "step": 4725 }, { "epoch": 0.3643231575701511, "grad_norm": 3.507568120956421, "learning_rate": 7.3449323702126985e-06, "loss": 0.9716, "step": 4726 }, { "epoch": 0.36440024668516807, "grad_norm": 3.8154513835906982, "learning_rate": 7.343829661840977e-06, "loss": 1.0059, "step": 4727 }, { "epoch": 0.364477335800185, "grad_norm": 3.319742202758789, "learning_rate": 7.342726807343615e-06, "loss": 0.9758, "step": 4728 }, { "epoch": 0.36455442491520196, "grad_norm": 3.301845073699951, "learning_rate": 7.341623806789371e-06, "loss": 0.9537, "step": 4729 }, { "epoch": 0.36463151403021893, "grad_norm": 3.977522611618042, "learning_rate": 7.340520660247008e-06, "loss": 1.0368, "step": 4730 }, { "epoch": 0.3647086031452359, "grad_norm": 3.8079488277435303, "learning_rate": 7.339417367785304e-06, "loss": 0.9951, "step": 4731 }, { "epoch": 0.36478569226025287, "grad_norm": 3.3151187896728516, "learning_rate": 7.338313929473046e-06, "loss": 1.0278, "step": 4732 }, { "epoch": 0.3648627813752698, "grad_norm": 3.971615791320801, "learning_rate": 7.337210345379022e-06, "loss": 0.9549, "step": 4733 }, { "epoch": 0.36493987049028676, "grad_norm": 3.8018195629119873, "learning_rate": 7.3361066155720385e-06, "loss": 1.0072, "step": 4734 }, { "epoch": 0.3650169596053037, "grad_norm": 3.6749868392944336, "learning_rate": 7.335002740120907e-06, "loss": 0.9764, "step": 4735 }, { "epoch": 0.3650940487203207, "grad_norm": 3.655430316925049, "learning_rate": 7.333898719094448e-06, "loss": 0.9881, "step": 4736 }, { "epoch": 0.36517113783533767, "grad_norm": 3.9375107288360596, "learning_rate": 7.3327945525614906e-06, "loss": 1.0533, "step": 4737 }, { "epoch": 0.36524822695035464, "grad_norm": 3.5556159019470215, "learning_rate": 7.3316902405908756e-06, "loss": 0.9244, "step": 4738 }, { "epoch": 0.36532531606537155, "grad_norm": 3.914153575897217, "learning_rate": 7.33058578325145e-06, "loss": 1.0044, "step": 4739 }, { "epoch": 0.3654024051803885, "grad_norm": 3.557025194168091, "learning_rate": 7.329481180612072e-06, "loss": 0.8395, "step": 4740 }, { "epoch": 0.3654794942954055, "grad_norm": 3.5067689418792725, "learning_rate": 7.328376432741605e-06, "loss": 1.0821, "step": 4741 }, { "epoch": 0.36555658341042246, "grad_norm": 3.890801191329956, "learning_rate": 7.327271539708927e-06, "loss": 1.0238, "step": 4742 }, { "epoch": 0.36563367252543943, "grad_norm": 4.020145893096924, "learning_rate": 7.326166501582922e-06, "loss": 0.9887, "step": 4743 }, { "epoch": 0.36571076164045635, "grad_norm": 3.579129219055176, "learning_rate": 7.325061318432482e-06, "loss": 1.0122, "step": 4744 }, { "epoch": 0.3657878507554733, "grad_norm": 3.599555730819702, "learning_rate": 7.323955990326514e-06, "loss": 1.0213, "step": 4745 }, { "epoch": 0.3658649398704903, "grad_norm": 3.7440710067749023, "learning_rate": 7.322850517333924e-06, "loss": 1.1071, "step": 4746 }, { "epoch": 0.36594202898550726, "grad_norm": 3.8629238605499268, "learning_rate": 7.321744899523634e-06, "loss": 1.0812, "step": 4747 }, { "epoch": 0.36601911810052423, "grad_norm": 3.8284130096435547, "learning_rate": 7.320639136964576e-06, "loss": 0.9624, "step": 4748 }, { "epoch": 0.36609620721554115, "grad_norm": 3.523813247680664, "learning_rate": 7.319533229725685e-06, "loss": 1.0628, "step": 4749 }, { "epoch": 0.3661732963305581, "grad_norm": 3.4013636112213135, "learning_rate": 7.31842717787591e-06, "loss": 0.9275, "step": 4750 }, { "epoch": 0.3662503854455751, "grad_norm": 3.6408772468566895, "learning_rate": 7.31732098148421e-06, "loss": 1.0767, "step": 4751 }, { "epoch": 0.36632747456059206, "grad_norm": 3.796874523162842, "learning_rate": 7.316214640619546e-06, "loss": 1.013, "step": 4752 }, { "epoch": 0.36640456367560903, "grad_norm": 3.552623748779297, "learning_rate": 7.3151081553508975e-06, "loss": 0.9438, "step": 4753 }, { "epoch": 0.36648165279062594, "grad_norm": 3.6614010334014893, "learning_rate": 7.314001525747244e-06, "loss": 0.981, "step": 4754 }, { "epoch": 0.3665587419056429, "grad_norm": 3.726024627685547, "learning_rate": 7.31289475187758e-06, "loss": 0.9823, "step": 4755 }, { "epoch": 0.3666358310206599, "grad_norm": 3.74556303024292, "learning_rate": 7.311787833810908e-06, "loss": 0.9622, "step": 4756 }, { "epoch": 0.36671292013567686, "grad_norm": 3.351370096206665, "learning_rate": 7.310680771616238e-06, "loss": 0.9525, "step": 4757 }, { "epoch": 0.3667900092506938, "grad_norm": 3.492908477783203, "learning_rate": 7.309573565362588e-06, "loss": 0.9845, "step": 4758 }, { "epoch": 0.36686709836571074, "grad_norm": 3.719702959060669, "learning_rate": 7.308466215118988e-06, "loss": 0.9873, "step": 4759 }, { "epoch": 0.3669441874807277, "grad_norm": 3.778932809829712, "learning_rate": 7.307358720954476e-06, "loss": 1.0009, "step": 4760 }, { "epoch": 0.3670212765957447, "grad_norm": 3.6195807456970215, "learning_rate": 7.306251082938096e-06, "loss": 0.8939, "step": 4761 }, { "epoch": 0.36709836571076165, "grad_norm": 3.6259400844573975, "learning_rate": 7.305143301138908e-06, "loss": 1.0111, "step": 4762 }, { "epoch": 0.3671754548257786, "grad_norm": 4.1105637550354, "learning_rate": 7.3040353756259726e-06, "loss": 1.0318, "step": 4763 }, { "epoch": 0.36725254394079554, "grad_norm": 4.061164379119873, "learning_rate": 7.302927306468365e-06, "loss": 0.9631, "step": 4764 }, { "epoch": 0.3673296330558125, "grad_norm": 3.501284122467041, "learning_rate": 7.301819093735165e-06, "loss": 1.0098, "step": 4765 }, { "epoch": 0.3674067221708295, "grad_norm": 3.7744534015655518, "learning_rate": 7.3007107374954665e-06, "loss": 1.1278, "step": 4766 }, { "epoch": 0.36748381128584645, "grad_norm": 3.3170313835144043, "learning_rate": 7.299602237818371e-06, "loss": 1.0022, "step": 4767 }, { "epoch": 0.3675609004008634, "grad_norm": 3.677799701690674, "learning_rate": 7.298493594772985e-06, "loss": 1.0452, "step": 4768 }, { "epoch": 0.36763798951588034, "grad_norm": 3.811732292175293, "learning_rate": 7.297384808428428e-06, "loss": 0.8941, "step": 4769 }, { "epoch": 0.3677150786308973, "grad_norm": 4.003880977630615, "learning_rate": 7.296275878853826e-06, "loss": 1.0617, "step": 4770 }, { "epoch": 0.3677921677459143, "grad_norm": 3.452606439590454, "learning_rate": 7.295166806118315e-06, "loss": 0.9734, "step": 4771 }, { "epoch": 0.36786925686093125, "grad_norm": 3.6096274852752686, "learning_rate": 7.294057590291043e-06, "loss": 0.8906, "step": 4772 }, { "epoch": 0.3679463459759482, "grad_norm": 3.7491250038146973, "learning_rate": 7.2929482314411596e-06, "loss": 0.959, "step": 4773 }, { "epoch": 0.36802343509096513, "grad_norm": 3.7471847534179688, "learning_rate": 7.291838729637829e-06, "loss": 1.1626, "step": 4774 }, { "epoch": 0.3681005242059821, "grad_norm": 3.6201562881469727, "learning_rate": 7.290729084950226e-06, "loss": 0.9828, "step": 4775 }, { "epoch": 0.3681776133209991, "grad_norm": 3.8552613258361816, "learning_rate": 7.289619297447525e-06, "loss": 0.9679, "step": 4776 }, { "epoch": 0.36825470243601605, "grad_norm": 3.5603456497192383, "learning_rate": 7.288509367198922e-06, "loss": 0.9275, "step": 4777 }, { "epoch": 0.368331791551033, "grad_norm": 3.6187384128570557, "learning_rate": 7.28739929427361e-06, "loss": 1.0267, "step": 4778 }, { "epoch": 0.36840888066604993, "grad_norm": 3.5130581855773926, "learning_rate": 7.2862890787408e-06, "loss": 0.9113, "step": 4779 }, { "epoch": 0.3684859697810669, "grad_norm": 3.5186641216278076, "learning_rate": 7.2851787206697075e-06, "loss": 0.9837, "step": 4780 }, { "epoch": 0.3685630588960839, "grad_norm": 3.7700576782226562, "learning_rate": 7.284068220129557e-06, "loss": 1.0265, "step": 4781 }, { "epoch": 0.36864014801110084, "grad_norm": 3.770547389984131, "learning_rate": 7.282957577189581e-06, "loss": 1.0115, "step": 4782 }, { "epoch": 0.3687172371261178, "grad_norm": 3.8225295543670654, "learning_rate": 7.281846791919025e-06, "loss": 1.1445, "step": 4783 }, { "epoch": 0.36879432624113473, "grad_norm": 3.4760961532592773, "learning_rate": 7.280735864387138e-06, "loss": 0.9826, "step": 4784 }, { "epoch": 0.3688714153561517, "grad_norm": 3.5458803176879883, "learning_rate": 7.279624794663184e-06, "loss": 0.9137, "step": 4785 }, { "epoch": 0.36894850447116867, "grad_norm": 3.6354055404663086, "learning_rate": 7.27851358281643e-06, "loss": 0.9363, "step": 4786 }, { "epoch": 0.36902559358618564, "grad_norm": 3.4249465465545654, "learning_rate": 7.277402228916155e-06, "loss": 0.9732, "step": 4787 }, { "epoch": 0.3691026827012026, "grad_norm": 4.003117561340332, "learning_rate": 7.2762907330316446e-06, "loss": 1.0536, "step": 4788 }, { "epoch": 0.3691797718162195, "grad_norm": 3.547496795654297, "learning_rate": 7.275179095232197e-06, "loss": 0.9676, "step": 4789 }, { "epoch": 0.3692568609312365, "grad_norm": 3.738219976425171, "learning_rate": 7.274067315587116e-06, "loss": 1.0024, "step": 4790 }, { "epoch": 0.36933395004625347, "grad_norm": 4.052086353302002, "learning_rate": 7.272955394165717e-06, "loss": 0.9979, "step": 4791 }, { "epoch": 0.36941103916127044, "grad_norm": 3.6811420917510986, "learning_rate": 7.27184333103732e-06, "loss": 1.0432, "step": 4792 }, { "epoch": 0.3694881282762874, "grad_norm": 3.4397454261779785, "learning_rate": 7.270731126271257e-06, "loss": 0.8792, "step": 4793 }, { "epoch": 0.3695652173913043, "grad_norm": 3.70704984664917, "learning_rate": 7.26961877993687e-06, "loss": 0.8806, "step": 4794 }, { "epoch": 0.3696423065063213, "grad_norm": 4.066430568695068, "learning_rate": 7.268506292103505e-06, "loss": 1.0428, "step": 4795 }, { "epoch": 0.36971939562133826, "grad_norm": 3.72969388961792, "learning_rate": 7.267393662840525e-06, "loss": 1.0417, "step": 4796 }, { "epoch": 0.36979648473635524, "grad_norm": 3.8739657402038574, "learning_rate": 7.26628089221729e-06, "loss": 1.0413, "step": 4797 }, { "epoch": 0.3698735738513722, "grad_norm": 3.4731998443603516, "learning_rate": 7.265167980303179e-06, "loss": 1.0326, "step": 4798 }, { "epoch": 0.3699506629663891, "grad_norm": 3.697890281677246, "learning_rate": 7.264054927167577e-06, "loss": 1.0505, "step": 4799 }, { "epoch": 0.3700277520814061, "grad_norm": 3.670499086380005, "learning_rate": 7.2629417328798755e-06, "loss": 0.9704, "step": 4800 }, { "epoch": 0.37010484119642306, "grad_norm": 3.542783260345459, "learning_rate": 7.2618283975094785e-06, "loss": 1.0082, "step": 4801 }, { "epoch": 0.37018193031144003, "grad_norm": 3.7526941299438477, "learning_rate": 7.260714921125795e-06, "loss": 1.05, "step": 4802 }, { "epoch": 0.370259019426457, "grad_norm": 3.547579050064087, "learning_rate": 7.259601303798243e-06, "loss": 0.9653, "step": 4803 }, { "epoch": 0.3703361085414739, "grad_norm": 3.573000192642212, "learning_rate": 7.258487545596254e-06, "loss": 0.9571, "step": 4804 }, { "epoch": 0.3704131976564909, "grad_norm": 3.745025873184204, "learning_rate": 7.257373646589262e-06, "loss": 1.0854, "step": 4805 }, { "epoch": 0.37049028677150786, "grad_norm": 3.414745330810547, "learning_rate": 7.256259606846715e-06, "loss": 0.9017, "step": 4806 }, { "epoch": 0.37056737588652483, "grad_norm": 3.3791048526763916, "learning_rate": 7.255145426438068e-06, "loss": 0.946, "step": 4807 }, { "epoch": 0.3706444650015418, "grad_norm": 3.603844165802002, "learning_rate": 7.254031105432781e-06, "loss": 0.956, "step": 4808 }, { "epoch": 0.3707215541165587, "grad_norm": 4.20005464553833, "learning_rate": 7.252916643900331e-06, "loss": 1.0829, "step": 4809 }, { "epoch": 0.3707986432315757, "grad_norm": 3.4412059783935547, "learning_rate": 7.251802041910194e-06, "loss": 1.0052, "step": 4810 }, { "epoch": 0.37087573234659266, "grad_norm": 3.931774616241455, "learning_rate": 7.250687299531864e-06, "loss": 1.0152, "step": 4811 }, { "epoch": 0.3709528214616096, "grad_norm": 3.7596309185028076, "learning_rate": 7.249572416834838e-06, "loss": 0.9358, "step": 4812 }, { "epoch": 0.3710299105766266, "grad_norm": 3.739588975906372, "learning_rate": 7.2484573938886215e-06, "loss": 0.9234, "step": 4813 }, { "epoch": 0.3711069996916435, "grad_norm": 3.4759035110473633, "learning_rate": 7.2473422307627304e-06, "loss": 1.0292, "step": 4814 }, { "epoch": 0.3711840888066605, "grad_norm": 3.456209659576416, "learning_rate": 7.246226927526693e-06, "loss": 1.0245, "step": 4815 }, { "epoch": 0.37126117792167745, "grad_norm": 3.7224795818328857, "learning_rate": 7.245111484250038e-06, "loss": 0.9233, "step": 4816 }, { "epoch": 0.3713382670366944, "grad_norm": 3.3582236766815186, "learning_rate": 7.243995901002312e-06, "loss": 1.0253, "step": 4817 }, { "epoch": 0.3714153561517114, "grad_norm": 3.8575472831726074, "learning_rate": 7.242880177853062e-06, "loss": 1.0134, "step": 4818 }, { "epoch": 0.3714924452667283, "grad_norm": 3.423126697540283, "learning_rate": 7.241764314871848e-06, "loss": 0.9355, "step": 4819 }, { "epoch": 0.3715695343817453, "grad_norm": 3.5420002937316895, "learning_rate": 7.240648312128242e-06, "loss": 0.9646, "step": 4820 }, { "epoch": 0.37164662349676225, "grad_norm": 3.214689254760742, "learning_rate": 7.239532169691817e-06, "loss": 0.9524, "step": 4821 }, { "epoch": 0.3717237126117792, "grad_norm": 3.500775098800659, "learning_rate": 7.23841588763216e-06, "loss": 1.0106, "step": 4822 }, { "epoch": 0.3718008017267962, "grad_norm": 3.302901029586792, "learning_rate": 7.237299466018866e-06, "loss": 0.8822, "step": 4823 }, { "epoch": 0.37187789084181316, "grad_norm": 3.5195157527923584, "learning_rate": 7.236182904921536e-06, "loss": 1.0378, "step": 4824 }, { "epoch": 0.3719549799568301, "grad_norm": 3.8534128665924072, "learning_rate": 7.2350662044097854e-06, "loss": 1.0471, "step": 4825 }, { "epoch": 0.37203206907184705, "grad_norm": 3.6526763439178467, "learning_rate": 7.233949364553232e-06, "loss": 1.0511, "step": 4826 }, { "epoch": 0.372109158186864, "grad_norm": 3.9592645168304443, "learning_rate": 7.2328323854215044e-06, "loss": 0.9967, "step": 4827 }, { "epoch": 0.372186247301881, "grad_norm": 3.569300889968872, "learning_rate": 7.231715267084243e-06, "loss": 0.9936, "step": 4828 }, { "epoch": 0.37226333641689796, "grad_norm": 3.6688168048858643, "learning_rate": 7.2305980096110925e-06, "loss": 1.1023, "step": 4829 }, { "epoch": 0.3723404255319149, "grad_norm": 3.4594991207122803, "learning_rate": 7.229480613071709e-06, "loss": 0.9457, "step": 4830 }, { "epoch": 0.37241751464693185, "grad_norm": 3.949270009994507, "learning_rate": 7.228363077535756e-06, "loss": 0.9467, "step": 4831 }, { "epoch": 0.3724946037619488, "grad_norm": 3.7275776863098145, "learning_rate": 7.227245403072905e-06, "loss": 0.909, "step": 4832 }, { "epoch": 0.3725716928769658, "grad_norm": 3.561305046081543, "learning_rate": 7.2261275897528405e-06, "loss": 1.1122, "step": 4833 }, { "epoch": 0.37264878199198276, "grad_norm": 3.612966299057007, "learning_rate": 7.225009637645248e-06, "loss": 1.0967, "step": 4834 }, { "epoch": 0.3727258711069997, "grad_norm": 3.454404354095459, "learning_rate": 7.223891546819829e-06, "loss": 1.0302, "step": 4835 }, { "epoch": 0.37280296022201664, "grad_norm": 3.8778316974639893, "learning_rate": 7.222773317346291e-06, "loss": 1.0213, "step": 4836 }, { "epoch": 0.3728800493370336, "grad_norm": 3.4762234687805176, "learning_rate": 7.221654949294348e-06, "loss": 0.9744, "step": 4837 }, { "epoch": 0.3729571384520506, "grad_norm": 3.4373879432678223, "learning_rate": 7.220536442733724e-06, "loss": 0.8598, "step": 4838 }, { "epoch": 0.37303422756706756, "grad_norm": 4.246344089508057, "learning_rate": 7.219417797734155e-06, "loss": 1.0942, "step": 4839 }, { "epoch": 0.37311131668208447, "grad_norm": 3.5066323280334473, "learning_rate": 7.2182990143653795e-06, "loss": 1.0291, "step": 4840 }, { "epoch": 0.37318840579710144, "grad_norm": 3.613722562789917, "learning_rate": 7.217180092697152e-06, "loss": 0.9715, "step": 4841 }, { "epoch": 0.3732654949121184, "grad_norm": 3.9713635444641113, "learning_rate": 7.216061032799225e-06, "loss": 1.0856, "step": 4842 }, { "epoch": 0.3733425840271354, "grad_norm": 3.5957889556884766, "learning_rate": 7.2149418347413724e-06, "loss": 0.9641, "step": 4843 }, { "epoch": 0.37341967314215235, "grad_norm": 3.5281994342803955, "learning_rate": 7.213822498593368e-06, "loss": 1.0026, "step": 4844 }, { "epoch": 0.37349676225716927, "grad_norm": 4.489894390106201, "learning_rate": 7.212703024424995e-06, "loss": 1.061, "step": 4845 }, { "epoch": 0.37357385137218624, "grad_norm": 4.678754806518555, "learning_rate": 7.211583412306049e-06, "loss": 0.9971, "step": 4846 }, { "epoch": 0.3736509404872032, "grad_norm": 4.363256454467773, "learning_rate": 7.2104636623063315e-06, "loss": 1.0027, "step": 4847 }, { "epoch": 0.3737280296022202, "grad_norm": 3.5184109210968018, "learning_rate": 7.209343774495652e-06, "loss": 1.0354, "step": 4848 }, { "epoch": 0.37380511871723715, "grad_norm": 3.521500587463379, "learning_rate": 7.208223748943832e-06, "loss": 0.9084, "step": 4849 }, { "epoch": 0.37388220783225407, "grad_norm": 3.56219744682312, "learning_rate": 7.207103585720697e-06, "loss": 1.0089, "step": 4850 }, { "epoch": 0.37395929694727104, "grad_norm": 3.738111734390259, "learning_rate": 7.2059832848960855e-06, "loss": 1.0386, "step": 4851 }, { "epoch": 0.374036386062288, "grad_norm": 4.135499954223633, "learning_rate": 7.204862846539841e-06, "loss": 1.0668, "step": 4852 }, { "epoch": 0.374113475177305, "grad_norm": 3.822765827178955, "learning_rate": 7.2037422707218165e-06, "loss": 1.0167, "step": 4853 }, { "epoch": 0.37419056429232195, "grad_norm": 3.540203094482422, "learning_rate": 7.202621557511874e-06, "loss": 0.9056, "step": 4854 }, { "epoch": 0.37426765340733886, "grad_norm": 3.5464091300964355, "learning_rate": 7.201500706979886e-06, "loss": 0.9215, "step": 4855 }, { "epoch": 0.37434474252235583, "grad_norm": 3.7455790042877197, "learning_rate": 7.20037971919573e-06, "loss": 0.9164, "step": 4856 }, { "epoch": 0.3744218316373728, "grad_norm": 3.647853374481201, "learning_rate": 7.199258594229297e-06, "loss": 1.0188, "step": 4857 }, { "epoch": 0.3744989207523898, "grad_norm": 3.654245615005493, "learning_rate": 7.198137332150479e-06, "loss": 1.0136, "step": 4858 }, { "epoch": 0.37457600986740675, "grad_norm": 3.327587127685547, "learning_rate": 7.197015933029184e-06, "loss": 0.9465, "step": 4859 }, { "epoch": 0.37465309898242366, "grad_norm": 4.306604385375977, "learning_rate": 7.195894396935324e-06, "loss": 1.0239, "step": 4860 }, { "epoch": 0.37473018809744063, "grad_norm": 4.178218841552734, "learning_rate": 7.194772723938819e-06, "loss": 1.0016, "step": 4861 }, { "epoch": 0.3748072772124576, "grad_norm": 4.198592185974121, "learning_rate": 7.193650914109603e-06, "loss": 1.0389, "step": 4862 }, { "epoch": 0.37488436632747457, "grad_norm": 3.9807186126708984, "learning_rate": 7.192528967517615e-06, "loss": 0.9948, "step": 4863 }, { "epoch": 0.37496145544249154, "grad_norm": 3.558039426803589, "learning_rate": 7.1914068842328e-06, "loss": 1.0135, "step": 4864 }, { "epoch": 0.37503854455750846, "grad_norm": 3.5846774578094482, "learning_rate": 7.190284664325116e-06, "loss": 1.0469, "step": 4865 }, { "epoch": 0.37511563367252543, "grad_norm": 3.2694685459136963, "learning_rate": 7.189162307864525e-06, "loss": 0.9575, "step": 4866 }, { "epoch": 0.3751927227875424, "grad_norm": 3.3585803508758545, "learning_rate": 7.188039814921004e-06, "loss": 0.9909, "step": 4867 }, { "epoch": 0.37526981190255937, "grad_norm": 3.606834650039673, "learning_rate": 7.186917185564534e-06, "loss": 0.999, "step": 4868 }, { "epoch": 0.37534690101757634, "grad_norm": 3.2820441722869873, "learning_rate": 7.185794419865102e-06, "loss": 0.9831, "step": 4869 }, { "epoch": 0.37542399013259325, "grad_norm": 3.559875011444092, "learning_rate": 7.184671517892707e-06, "loss": 0.9819, "step": 4870 }, { "epoch": 0.3755010792476102, "grad_norm": 3.5334460735321045, "learning_rate": 7.183548479717361e-06, "loss": 1.031, "step": 4871 }, { "epoch": 0.3755781683626272, "grad_norm": 3.8719050884246826, "learning_rate": 7.1824253054090735e-06, "loss": 0.9976, "step": 4872 }, { "epoch": 0.37565525747764417, "grad_norm": 3.7610104084014893, "learning_rate": 7.1813019950378724e-06, "loss": 1.1075, "step": 4873 }, { "epoch": 0.37573234659266114, "grad_norm": 3.8099474906921387, "learning_rate": 7.1801785486737884e-06, "loss": 1.114, "step": 4874 }, { "epoch": 0.37580943570767805, "grad_norm": 3.353347063064575, "learning_rate": 7.1790549663868644e-06, "loss": 0.9258, "step": 4875 }, { "epoch": 0.375886524822695, "grad_norm": 3.220855236053467, "learning_rate": 7.1779312482471475e-06, "loss": 0.9835, "step": 4876 }, { "epoch": 0.375963613937712, "grad_norm": 3.72371244430542, "learning_rate": 7.176807394324697e-06, "loss": 1.0217, "step": 4877 }, { "epoch": 0.37604070305272896, "grad_norm": 3.4218595027923584, "learning_rate": 7.1756834046895815e-06, "loss": 0.9421, "step": 4878 }, { "epoch": 0.37611779216774593, "grad_norm": 3.7744340896606445, "learning_rate": 7.174559279411872e-06, "loss": 0.9703, "step": 4879 }, { "epoch": 0.37619488128276285, "grad_norm": 3.9162957668304443, "learning_rate": 7.173435018561654e-06, "loss": 1.1117, "step": 4880 }, { "epoch": 0.3762719703977798, "grad_norm": 3.7875723838806152, "learning_rate": 7.17231062220902e-06, "loss": 1.0843, "step": 4881 }, { "epoch": 0.3763490595127968, "grad_norm": 3.8338139057159424, "learning_rate": 7.171186090424069e-06, "loss": 1.0342, "step": 4882 }, { "epoch": 0.37642614862781376, "grad_norm": 3.3888375759124756, "learning_rate": 7.170061423276911e-06, "loss": 1.0047, "step": 4883 }, { "epoch": 0.37650323774283073, "grad_norm": 3.7450151443481445, "learning_rate": 7.168936620837661e-06, "loss": 0.9721, "step": 4884 }, { "epoch": 0.37658032685784765, "grad_norm": 3.4551503658294678, "learning_rate": 7.167811683176446e-06, "loss": 0.8549, "step": 4885 }, { "epoch": 0.3766574159728646, "grad_norm": 3.826219320297241, "learning_rate": 7.166686610363399e-06, "loss": 1.0317, "step": 4886 }, { "epoch": 0.3767345050878816, "grad_norm": 3.661606788635254, "learning_rate": 7.165561402468666e-06, "loss": 0.9179, "step": 4887 }, { "epoch": 0.37681159420289856, "grad_norm": 3.9215991497039795, "learning_rate": 7.164436059562393e-06, "loss": 1.1039, "step": 4888 }, { "epoch": 0.37688868331791553, "grad_norm": 3.983035087585449, "learning_rate": 7.163310581714744e-06, "loss": 0.9932, "step": 4889 }, { "epoch": 0.37696577243293244, "grad_norm": 3.3933942317962646, "learning_rate": 7.162184968995882e-06, "loss": 0.9746, "step": 4890 }, { "epoch": 0.3770428615479494, "grad_norm": 3.4548404216766357, "learning_rate": 7.161059221475985e-06, "loss": 0.8202, "step": 4891 }, { "epoch": 0.3771199506629664, "grad_norm": 3.530745267868042, "learning_rate": 7.15993333922524e-06, "loss": 0.8811, "step": 4892 }, { "epoch": 0.37719703977798336, "grad_norm": 3.4644525051116943, "learning_rate": 7.158807322313837e-06, "loss": 1.0706, "step": 4893 }, { "epoch": 0.3772741288930003, "grad_norm": 3.4556727409362793, "learning_rate": 7.157681170811979e-06, "loss": 0.9343, "step": 4894 }, { "epoch": 0.37735121800801724, "grad_norm": 3.663114547729492, "learning_rate": 7.156554884789874e-06, "loss": 1.0052, "step": 4895 }, { "epoch": 0.3774283071230342, "grad_norm": 3.9174418449401855, "learning_rate": 7.155428464317741e-06, "loss": 0.9232, "step": 4896 }, { "epoch": 0.3775053962380512, "grad_norm": 3.8079843521118164, "learning_rate": 7.1543019094658074e-06, "loss": 0.9495, "step": 4897 }, { "epoch": 0.37758248535306815, "grad_norm": 3.580113649368286, "learning_rate": 7.153175220304305e-06, "loss": 1.0305, "step": 4898 }, { "epoch": 0.3776595744680851, "grad_norm": 3.540668487548828, "learning_rate": 7.152048396903479e-06, "loss": 0.959, "step": 4899 }, { "epoch": 0.37773666358310204, "grad_norm": 3.402894973754883, "learning_rate": 7.150921439333584e-06, "loss": 0.9978, "step": 4900 }, { "epoch": 0.377813752698119, "grad_norm": 3.550427198410034, "learning_rate": 7.149794347664876e-06, "loss": 1.0077, "step": 4901 }, { "epoch": 0.377890841813136, "grad_norm": 3.6682233810424805, "learning_rate": 7.148667121967625e-06, "loss": 0.9905, "step": 4902 }, { "epoch": 0.37796793092815295, "grad_norm": 3.8685266971588135, "learning_rate": 7.147539762312107e-06, "loss": 1.0975, "step": 4903 }, { "epoch": 0.3780450200431699, "grad_norm": 3.448533773422241, "learning_rate": 7.146412268768605e-06, "loss": 0.857, "step": 4904 }, { "epoch": 0.37812210915818684, "grad_norm": 3.861265182495117, "learning_rate": 7.145284641407418e-06, "loss": 1.0235, "step": 4905 }, { "epoch": 0.3781991982732038, "grad_norm": 3.419316053390503, "learning_rate": 7.144156880298843e-06, "loss": 1.0753, "step": 4906 }, { "epoch": 0.3782762873882208, "grad_norm": 3.76204776763916, "learning_rate": 7.143028985513191e-06, "loss": 0.9979, "step": 4907 }, { "epoch": 0.37835337650323775, "grad_norm": 3.591517686843872, "learning_rate": 7.141900957120781e-06, "loss": 1.0564, "step": 4908 }, { "epoch": 0.3784304656182547, "grad_norm": 3.76042103767395, "learning_rate": 7.140772795191939e-06, "loss": 1.0371, "step": 4909 }, { "epoch": 0.3785075547332717, "grad_norm": 4.168672561645508, "learning_rate": 7.139644499797e-06, "loss": 1.0409, "step": 4910 }, { "epoch": 0.3785846438482886, "grad_norm": 3.331540822982788, "learning_rate": 7.13851607100631e-06, "loss": 0.9319, "step": 4911 }, { "epoch": 0.3786617329633056, "grad_norm": 3.5602786540985107, "learning_rate": 7.137387508890218e-06, "loss": 0.8784, "step": 4912 }, { "epoch": 0.37873882207832255, "grad_norm": 3.6382076740264893, "learning_rate": 7.136258813519085e-06, "loss": 1.0411, "step": 4913 }, { "epoch": 0.3788159111933395, "grad_norm": 3.845329761505127, "learning_rate": 7.135129984963277e-06, "loss": 1.0158, "step": 4914 }, { "epoch": 0.3788930003083565, "grad_norm": 3.546248197555542, "learning_rate": 7.134001023293173e-06, "loss": 1.044, "step": 4915 }, { "epoch": 0.3789700894233734, "grad_norm": 3.4055960178375244, "learning_rate": 7.132871928579159e-06, "loss": 0.9337, "step": 4916 }, { "epoch": 0.3790471785383904, "grad_norm": 3.8326213359832764, "learning_rate": 7.131742700891626e-06, "loss": 1.0974, "step": 4917 }, { "epoch": 0.37912426765340734, "grad_norm": 3.4271841049194336, "learning_rate": 7.130613340300976e-06, "loss": 0.9532, "step": 4918 }, { "epoch": 0.3792013567684243, "grad_norm": 3.5200610160827637, "learning_rate": 7.1294838468776195e-06, "loss": 1.0648, "step": 4919 }, { "epoch": 0.3792784458834413, "grad_norm": 3.484342098236084, "learning_rate": 7.128354220691973e-06, "loss": 0.8582, "step": 4920 }, { "epoch": 0.3793555349984582, "grad_norm": 3.4747977256774902, "learning_rate": 7.127224461814465e-06, "loss": 0.9661, "step": 4921 }, { "epoch": 0.37943262411347517, "grad_norm": 3.514315366744995, "learning_rate": 7.126094570315527e-06, "loss": 1.0848, "step": 4922 }, { "epoch": 0.37950971322849214, "grad_norm": 3.6075520515441895, "learning_rate": 7.124964546265606e-06, "loss": 1.0381, "step": 4923 }, { "epoch": 0.3795868023435091, "grad_norm": 3.4969682693481445, "learning_rate": 7.1238343897351505e-06, "loss": 0.9393, "step": 4924 }, { "epoch": 0.3796638914585261, "grad_norm": 3.8373172283172607, "learning_rate": 7.12270410079462e-06, "loss": 1.1534, "step": 4925 }, { "epoch": 0.379740980573543, "grad_norm": 4.133366584777832, "learning_rate": 7.121573679514484e-06, "loss": 1.1071, "step": 4926 }, { "epoch": 0.37981806968855997, "grad_norm": 3.4957773685455322, "learning_rate": 7.120443125965215e-06, "loss": 0.9435, "step": 4927 }, { "epoch": 0.37989515880357694, "grad_norm": 3.298091411590576, "learning_rate": 7.1193124402172995e-06, "loss": 0.8858, "step": 4928 }, { "epoch": 0.3799722479185939, "grad_norm": 3.720797061920166, "learning_rate": 7.118181622341232e-06, "loss": 1.0036, "step": 4929 }, { "epoch": 0.3800493370336109, "grad_norm": 3.520009994506836, "learning_rate": 7.117050672407507e-06, "loss": 0.9223, "step": 4930 }, { "epoch": 0.3801264261486278, "grad_norm": 3.883283853530884, "learning_rate": 7.115919590486638e-06, "loss": 0.8866, "step": 4931 }, { "epoch": 0.38020351526364476, "grad_norm": 3.637392520904541, "learning_rate": 7.114788376649143e-06, "loss": 0.9867, "step": 4932 }, { "epoch": 0.38028060437866174, "grad_norm": 3.6200270652770996, "learning_rate": 7.113657030965544e-06, "loss": 0.9339, "step": 4933 }, { "epoch": 0.3803576934936787, "grad_norm": 3.7726197242736816, "learning_rate": 7.1125255535063766e-06, "loss": 0.9902, "step": 4934 }, { "epoch": 0.3804347826086957, "grad_norm": 3.6421115398406982, "learning_rate": 7.111393944342182e-06, "loss": 1.0895, "step": 4935 }, { "epoch": 0.3805118717237126, "grad_norm": 3.6255416870117188, "learning_rate": 7.110262203543509e-06, "loss": 0.9478, "step": 4936 }, { "epoch": 0.38058896083872956, "grad_norm": 3.6377484798431396, "learning_rate": 7.10913033118092e-06, "loss": 1.0047, "step": 4937 }, { "epoch": 0.38066604995374653, "grad_norm": 3.65698504447937, "learning_rate": 7.107998327324975e-06, "loss": 1.0363, "step": 4938 }, { "epoch": 0.3807431390687635, "grad_norm": 3.5486626625061035, "learning_rate": 7.106866192046254e-06, "loss": 1.0382, "step": 4939 }, { "epoch": 0.3808202281837805, "grad_norm": 3.589175224304199, "learning_rate": 7.105733925415336e-06, "loss": 0.9718, "step": 4940 }, { "epoch": 0.3808973172987974, "grad_norm": 3.605515956878662, "learning_rate": 7.104601527502815e-06, "loss": 0.9097, "step": 4941 }, { "epoch": 0.38097440641381436, "grad_norm": 3.868885040283203, "learning_rate": 7.103468998379288e-06, "loss": 1.0441, "step": 4942 }, { "epoch": 0.38105149552883133, "grad_norm": 3.621760129928589, "learning_rate": 7.102336338115363e-06, "loss": 1.0506, "step": 4943 }, { "epoch": 0.3811285846438483, "grad_norm": 4.027126789093018, "learning_rate": 7.101203546781655e-06, "loss": 1.0522, "step": 4944 }, { "epoch": 0.38120567375886527, "grad_norm": 3.5115973949432373, "learning_rate": 7.1000706244487896e-06, "loss": 0.9613, "step": 4945 }, { "epoch": 0.3812827628738822, "grad_norm": 3.698106050491333, "learning_rate": 7.098937571187397e-06, "loss": 0.9886, "step": 4946 }, { "epoch": 0.38135985198889916, "grad_norm": 3.994877576828003, "learning_rate": 7.097804387068117e-06, "loss": 1.1597, "step": 4947 }, { "epoch": 0.3814369411039161, "grad_norm": 3.984495162963867, "learning_rate": 7.0966710721616e-06, "loss": 0.9572, "step": 4948 }, { "epoch": 0.3815140302189331, "grad_norm": 3.9467110633850098, "learning_rate": 7.095537626538498e-06, "loss": 1.0071, "step": 4949 }, { "epoch": 0.38159111933395007, "grad_norm": 3.824596643447876, "learning_rate": 7.09440405026948e-06, "loss": 1.0244, "step": 4950 }, { "epoch": 0.381668208448967, "grad_norm": 3.812662124633789, "learning_rate": 7.093270343425216e-06, "loss": 1.0646, "step": 4951 }, { "epoch": 0.38174529756398395, "grad_norm": 3.604254961013794, "learning_rate": 7.092136506076387e-06, "loss": 1.0749, "step": 4952 }, { "epoch": 0.3818223866790009, "grad_norm": 3.493272542953491, "learning_rate": 7.091002538293683e-06, "loss": 0.9379, "step": 4953 }, { "epoch": 0.3818994757940179, "grad_norm": 3.4740121364593506, "learning_rate": 7.0898684401478e-06, "loss": 0.9795, "step": 4954 }, { "epoch": 0.38197656490903487, "grad_norm": 4.244378566741943, "learning_rate": 7.088734211709443e-06, "loss": 0.9794, "step": 4955 }, { "epoch": 0.3820536540240518, "grad_norm": 3.6403074264526367, "learning_rate": 7.087599853049327e-06, "loss": 1.0321, "step": 4956 }, { "epoch": 0.38213074313906875, "grad_norm": 3.5275707244873047, "learning_rate": 7.086465364238171e-06, "loss": 0.971, "step": 4957 }, { "epoch": 0.3822078322540857, "grad_norm": 4.26153564453125, "learning_rate": 7.085330745346706e-06, "loss": 1.0188, "step": 4958 }, { "epoch": 0.3822849213691027, "grad_norm": 3.5671985149383545, "learning_rate": 7.08419599644567e-06, "loss": 1.0241, "step": 4959 }, { "epoch": 0.38236201048411966, "grad_norm": 3.462334632873535, "learning_rate": 7.083061117605806e-06, "loss": 0.933, "step": 4960 }, { "epoch": 0.3824390995991366, "grad_norm": 3.856916904449463, "learning_rate": 7.081926108897872e-06, "loss": 0.9347, "step": 4961 }, { "epoch": 0.38251618871415355, "grad_norm": 3.425292730331421, "learning_rate": 7.080790970392626e-06, "loss": 0.9056, "step": 4962 }, { "epoch": 0.3825932778291705, "grad_norm": 3.7738280296325684, "learning_rate": 7.07965570216084e-06, "loss": 0.904, "step": 4963 }, { "epoch": 0.3826703669441875, "grad_norm": 3.893852710723877, "learning_rate": 7.078520304273293e-06, "loss": 0.9253, "step": 4964 }, { "epoch": 0.38274745605920446, "grad_norm": 3.1743626594543457, "learning_rate": 7.077384776800767e-06, "loss": 0.9373, "step": 4965 }, { "epoch": 0.3828245451742214, "grad_norm": 3.271000385284424, "learning_rate": 7.076249119814062e-06, "loss": 0.9536, "step": 4966 }, { "epoch": 0.38290163428923835, "grad_norm": 3.6323864459991455, "learning_rate": 7.075113333383976e-06, "loss": 1.0153, "step": 4967 }, { "epoch": 0.3829787234042553, "grad_norm": 4.131968975067139, "learning_rate": 7.073977417581321e-06, "loss": 1.0542, "step": 4968 }, { "epoch": 0.3830558125192723, "grad_norm": 3.279594898223877, "learning_rate": 7.072841372476918e-06, "loss": 0.9579, "step": 4969 }, { "epoch": 0.38313290163428926, "grad_norm": 3.7253003120422363, "learning_rate": 7.071705198141588e-06, "loss": 0.9364, "step": 4970 }, { "epoch": 0.3832099907493062, "grad_norm": 3.599295139312744, "learning_rate": 7.070568894646171e-06, "loss": 0.9531, "step": 4971 }, { "epoch": 0.38328707986432314, "grad_norm": 3.4222543239593506, "learning_rate": 7.0694324620615054e-06, "loss": 1.0856, "step": 4972 }, { "epoch": 0.3833641689793401, "grad_norm": 3.6698365211486816, "learning_rate": 7.0682959004584436e-06, "loss": 0.9242, "step": 4973 }, { "epoch": 0.3834412580943571, "grad_norm": 3.4762206077575684, "learning_rate": 7.067159209907845e-06, "loss": 0.933, "step": 4974 }, { "epoch": 0.38351834720937406, "grad_norm": 3.3484156131744385, "learning_rate": 7.066022390480576e-06, "loss": 0.9227, "step": 4975 }, { "epoch": 0.38359543632439097, "grad_norm": 4.061807155609131, "learning_rate": 7.06488544224751e-06, "loss": 1.0448, "step": 4976 }, { "epoch": 0.38367252543940794, "grad_norm": 3.4074602127075195, "learning_rate": 7.0637483652795325e-06, "loss": 0.8229, "step": 4977 }, { "epoch": 0.3837496145544249, "grad_norm": 3.6092684268951416, "learning_rate": 7.062611159647532e-06, "loss": 1.0388, "step": 4978 }, { "epoch": 0.3838267036694419, "grad_norm": 3.7201006412506104, "learning_rate": 7.061473825422408e-06, "loss": 0.9342, "step": 4979 }, { "epoch": 0.38390379278445885, "grad_norm": 3.228909969329834, "learning_rate": 7.060336362675069e-06, "loss": 0.9043, "step": 4980 }, { "epoch": 0.38398088189947577, "grad_norm": 3.3236916065216064, "learning_rate": 7.059198771476426e-06, "loss": 0.9576, "step": 4981 }, { "epoch": 0.38405797101449274, "grad_norm": 3.616847038269043, "learning_rate": 7.0580610518974065e-06, "loss": 1.0305, "step": 4982 }, { "epoch": 0.3841350601295097, "grad_norm": 3.782071590423584, "learning_rate": 7.05692320400894e-06, "loss": 0.9386, "step": 4983 }, { "epoch": 0.3842121492445267, "grad_norm": 3.3913662433624268, "learning_rate": 7.055785227881963e-06, "loss": 0.9801, "step": 4984 }, { "epoch": 0.38428923835954365, "grad_norm": 3.7937633991241455, "learning_rate": 7.054647123587426e-06, "loss": 1.0372, "step": 4985 }, { "epoch": 0.38436632747456057, "grad_norm": 3.505120038986206, "learning_rate": 7.05350889119628e-06, "loss": 0.9769, "step": 4986 }, { "epoch": 0.38444341658957754, "grad_norm": 3.569885015487671, "learning_rate": 7.052370530779491e-06, "loss": 1.138, "step": 4987 }, { "epoch": 0.3845205057045945, "grad_norm": 4.187367916107178, "learning_rate": 7.051232042408028e-06, "loss": 0.9448, "step": 4988 }, { "epoch": 0.3845975948196115, "grad_norm": 3.4536428451538086, "learning_rate": 7.050093426152872e-06, "loss": 0.8804, "step": 4989 }, { "epoch": 0.38467468393462845, "grad_norm": 3.314223289489746, "learning_rate": 7.0489546820850086e-06, "loss": 0.9758, "step": 4990 }, { "epoch": 0.38475177304964536, "grad_norm": 3.603670120239258, "learning_rate": 7.047815810275432e-06, "loss": 0.9632, "step": 4991 }, { "epoch": 0.38482886216466233, "grad_norm": 3.7399637699127197, "learning_rate": 7.046676810795146e-06, "loss": 1.1052, "step": 4992 }, { "epoch": 0.3849059512796793, "grad_norm": 3.740985870361328, "learning_rate": 7.045537683715161e-06, "loss": 1.0086, "step": 4993 }, { "epoch": 0.3849830403946963, "grad_norm": 3.6011998653411865, "learning_rate": 7.044398429106495e-06, "loss": 1.0373, "step": 4994 }, { "epoch": 0.38506012950971324, "grad_norm": 3.508432388305664, "learning_rate": 7.043259047040175e-06, "loss": 1.0279, "step": 4995 }, { "epoch": 0.3851372186247302, "grad_norm": 3.528916358947754, "learning_rate": 7.042119537587237e-06, "loss": 1.0366, "step": 4996 }, { "epoch": 0.38521430773974713, "grad_norm": 3.8693268299102783, "learning_rate": 7.04097990081872e-06, "loss": 0.9748, "step": 4997 }, { "epoch": 0.3852913968547641, "grad_norm": 3.851828098297119, "learning_rate": 7.039840136805679e-06, "loss": 1.0828, "step": 4998 }, { "epoch": 0.38536848596978107, "grad_norm": 3.7920031547546387, "learning_rate": 7.038700245619169e-06, "loss": 1.1299, "step": 4999 }, { "epoch": 0.38544557508479804, "grad_norm": 4.139936923980713, "learning_rate": 7.037560227330258e-06, "loss": 0.9281, "step": 5000 }, { "epoch": 0.385522664199815, "grad_norm": 3.6004490852355957, "learning_rate": 7.0364200820100194e-06, "loss": 1.0097, "step": 5001 }, { "epoch": 0.38559975331483193, "grad_norm": 3.911167621612549, "learning_rate": 7.035279809729535e-06, "loss": 1.0057, "step": 5002 }, { "epoch": 0.3856768424298489, "grad_norm": 3.6797120571136475, "learning_rate": 7.0341394105598944e-06, "loss": 0.9339, "step": 5003 }, { "epoch": 0.38575393154486587, "grad_norm": 3.733839750289917, "learning_rate": 7.0329988845721996e-06, "loss": 0.9404, "step": 5004 }, { "epoch": 0.38583102065988284, "grad_norm": 3.474809169769287, "learning_rate": 7.0318582318375514e-06, "loss": 1.0068, "step": 5005 }, { "epoch": 0.3859081097748998, "grad_norm": 4.166741371154785, "learning_rate": 7.030717452427066e-06, "loss": 1.0622, "step": 5006 }, { "epoch": 0.3859851988899167, "grad_norm": 3.7879574298858643, "learning_rate": 7.0295765464118645e-06, "loss": 0.9869, "step": 5007 }, { "epoch": 0.3860622880049337, "grad_norm": 3.560980796813965, "learning_rate": 7.028435513863078e-06, "loss": 1.022, "step": 5008 }, { "epoch": 0.38613937711995067, "grad_norm": 3.5441577434539795, "learning_rate": 7.027294354851842e-06, "loss": 1.014, "step": 5009 }, { "epoch": 0.38621646623496764, "grad_norm": 3.795830488204956, "learning_rate": 7.0261530694493015e-06, "loss": 0.8879, "step": 5010 }, { "epoch": 0.3862935553499846, "grad_norm": 3.435690402984619, "learning_rate": 7.025011657726613e-06, "loss": 0.8957, "step": 5011 }, { "epoch": 0.3863706444650015, "grad_norm": 3.815304756164551, "learning_rate": 7.0238701197549345e-06, "loss": 1.0519, "step": 5012 }, { "epoch": 0.3864477335800185, "grad_norm": 3.4580211639404297, "learning_rate": 7.0227284556054355e-06, "loss": 0.9664, "step": 5013 }, { "epoch": 0.38652482269503546, "grad_norm": 3.8483505249023438, "learning_rate": 7.0215866653492936e-06, "loss": 0.9913, "step": 5014 }, { "epoch": 0.38660191181005243, "grad_norm": 3.6694788932800293, "learning_rate": 7.020444749057693e-06, "loss": 0.9433, "step": 5015 }, { "epoch": 0.3866790009250694, "grad_norm": 3.6327717304229736, "learning_rate": 7.019302706801826e-06, "loss": 0.9477, "step": 5016 }, { "epoch": 0.3867560900400863, "grad_norm": 3.4883623123168945, "learning_rate": 7.018160538652895e-06, "loss": 0.9981, "step": 5017 }, { "epoch": 0.3868331791551033, "grad_norm": 3.332284927368164, "learning_rate": 7.017018244682106e-06, "loss": 0.8992, "step": 5018 }, { "epoch": 0.38691026827012026, "grad_norm": 3.850799560546875, "learning_rate": 7.015875824960675e-06, "loss": 1.105, "step": 5019 }, { "epoch": 0.38698735738513723, "grad_norm": 3.9528861045837402, "learning_rate": 7.014733279559829e-06, "loss": 0.9505, "step": 5020 }, { "epoch": 0.3870644465001542, "grad_norm": 3.9368906021118164, "learning_rate": 7.013590608550796e-06, "loss": 0.9322, "step": 5021 }, { "epoch": 0.3871415356151711, "grad_norm": 3.622251272201538, "learning_rate": 7.0124478120048164e-06, "loss": 0.9249, "step": 5022 }, { "epoch": 0.3872186247301881, "grad_norm": 3.4808027744293213, "learning_rate": 7.0113048899931406e-06, "loss": 1.045, "step": 5023 }, { "epoch": 0.38729571384520506, "grad_norm": 3.806081771850586, "learning_rate": 7.010161842587019e-06, "loss": 0.9854, "step": 5024 }, { "epoch": 0.38737280296022203, "grad_norm": 3.621647834777832, "learning_rate": 7.009018669857719e-06, "loss": 0.9636, "step": 5025 }, { "epoch": 0.387449892075239, "grad_norm": 3.846794605255127, "learning_rate": 7.0078753718765105e-06, "loss": 0.957, "step": 5026 }, { "epoch": 0.3875269811902559, "grad_norm": 3.383054733276367, "learning_rate": 7.00673194871467e-06, "loss": 0.8777, "step": 5027 }, { "epoch": 0.3876040703052729, "grad_norm": 3.6006200313568115, "learning_rate": 7.005588400443487e-06, "loss": 1.086, "step": 5028 }, { "epoch": 0.38768115942028986, "grad_norm": 3.8396594524383545, "learning_rate": 7.004444727134254e-06, "loss": 1.1418, "step": 5029 }, { "epoch": 0.3877582485353068, "grad_norm": 3.7753446102142334, "learning_rate": 7.003300928858273e-06, "loss": 0.9981, "step": 5030 }, { "epoch": 0.3878353376503238, "grad_norm": 3.5758206844329834, "learning_rate": 7.002157005686855e-06, "loss": 0.9798, "step": 5031 }, { "epoch": 0.3879124267653407, "grad_norm": 4.047423362731934, "learning_rate": 7.001012957691317e-06, "loss": 1.0324, "step": 5032 }, { "epoch": 0.3879895158803577, "grad_norm": 3.57684326171875, "learning_rate": 6.999868784942985e-06, "loss": 0.9592, "step": 5033 }, { "epoch": 0.38806660499537465, "grad_norm": 3.5978035926818848, "learning_rate": 6.998724487513191e-06, "loss": 0.94, "step": 5034 }, { "epoch": 0.3881436941103916, "grad_norm": 3.3599936962127686, "learning_rate": 6.9975800654732785e-06, "loss": 0.9095, "step": 5035 }, { "epoch": 0.3882207832254086, "grad_norm": 3.662285566329956, "learning_rate": 6.996435518894593e-06, "loss": 0.9509, "step": 5036 }, { "epoch": 0.3882978723404255, "grad_norm": 3.5465025901794434, "learning_rate": 6.9952908478484925e-06, "loss": 0.9714, "step": 5037 }, { "epoch": 0.3883749614554425, "grad_norm": 3.2565081119537354, "learning_rate": 6.994146052406343e-06, "loss": 0.9064, "step": 5038 }, { "epoch": 0.38845205057045945, "grad_norm": 3.7436461448669434, "learning_rate": 6.993001132639514e-06, "loss": 0.9848, "step": 5039 }, { "epoch": 0.3885291396854764, "grad_norm": 3.5175907611846924, "learning_rate": 6.991856088619387e-06, "loss": 1.0, "step": 5040 }, { "epoch": 0.3886062288004934, "grad_norm": 3.567593574523926, "learning_rate": 6.99071092041735e-06, "loss": 1.0659, "step": 5041 }, { "epoch": 0.3886833179155103, "grad_norm": 3.355001211166382, "learning_rate": 6.989565628104795e-06, "loss": 0.8182, "step": 5042 }, { "epoch": 0.3887604070305273, "grad_norm": 3.528596878051758, "learning_rate": 6.988420211753129e-06, "loss": 0.9568, "step": 5043 }, { "epoch": 0.38883749614554425, "grad_norm": 3.691378593444824, "learning_rate": 6.987274671433761e-06, "loss": 1.025, "step": 5044 }, { "epoch": 0.3889145852605612, "grad_norm": 3.5705032348632812, "learning_rate": 6.986129007218107e-06, "loss": 0.9854, "step": 5045 }, { "epoch": 0.3889916743755782, "grad_norm": 3.5841269493103027, "learning_rate": 6.984983219177599e-06, "loss": 1.1187, "step": 5046 }, { "epoch": 0.3890687634905951, "grad_norm": 3.42091965675354, "learning_rate": 6.983837307383666e-06, "loss": 0.9753, "step": 5047 }, { "epoch": 0.3891458526056121, "grad_norm": 3.6692841053009033, "learning_rate": 6.982691271907752e-06, "loss": 0.8831, "step": 5048 }, { "epoch": 0.38922294172062905, "grad_norm": 3.5156326293945312, "learning_rate": 6.981545112821306e-06, "loss": 1.0276, "step": 5049 }, { "epoch": 0.389300030835646, "grad_norm": 3.318147897720337, "learning_rate": 6.980398830195785e-06, "loss": 0.9131, "step": 5050 }, { "epoch": 0.389377119950663, "grad_norm": 4.0214152336120605, "learning_rate": 6.979252424102654e-06, "loss": 1.0253, "step": 5051 }, { "epoch": 0.3894542090656799, "grad_norm": 3.803903818130493, "learning_rate": 6.978105894613385e-06, "loss": 1.0602, "step": 5052 }, { "epoch": 0.38953129818069687, "grad_norm": 3.736529588699341, "learning_rate": 6.976959241799456e-06, "loss": 0.9655, "step": 5053 }, { "epoch": 0.38960838729571384, "grad_norm": 3.8049023151397705, "learning_rate": 6.97581246573236e-06, "loss": 0.9087, "step": 5054 }, { "epoch": 0.3896854764107308, "grad_norm": 4.01686954498291, "learning_rate": 6.974665566483588e-06, "loss": 1.1021, "step": 5055 }, { "epoch": 0.3897625655257478, "grad_norm": 3.589202642440796, "learning_rate": 6.9735185441246466e-06, "loss": 0.9415, "step": 5056 }, { "epoch": 0.3898396546407647, "grad_norm": 3.462862014770508, "learning_rate": 6.972371398727045e-06, "loss": 0.9772, "step": 5057 }, { "epoch": 0.38991674375578167, "grad_norm": 3.413395404815674, "learning_rate": 6.971224130362301e-06, "loss": 0.9391, "step": 5058 }, { "epoch": 0.38999383287079864, "grad_norm": 3.5816166400909424, "learning_rate": 6.970076739101942e-06, "loss": 0.9687, "step": 5059 }, { "epoch": 0.3900709219858156, "grad_norm": 3.3513641357421875, "learning_rate": 6.968929225017501e-06, "loss": 1.0188, "step": 5060 }, { "epoch": 0.3901480111008326, "grad_norm": 3.450777292251587, "learning_rate": 6.9677815881805215e-06, "loss": 0.9195, "step": 5061 }, { "epoch": 0.3902251002158495, "grad_norm": 3.946836471557617, "learning_rate": 6.96663382866255e-06, "loss": 0.9913, "step": 5062 }, { "epoch": 0.39030218933086647, "grad_norm": 3.5374841690063477, "learning_rate": 6.965485946535145e-06, "loss": 0.945, "step": 5063 }, { "epoch": 0.39037927844588344, "grad_norm": 3.718808174133301, "learning_rate": 6.964337941869872e-06, "loss": 1.0653, "step": 5064 }, { "epoch": 0.3904563675609004, "grad_norm": 3.8617405891418457, "learning_rate": 6.963189814738301e-06, "loss": 1.0212, "step": 5065 }, { "epoch": 0.3905334566759174, "grad_norm": 3.3130338191986084, "learning_rate": 6.962041565212012e-06, "loss": 0.9596, "step": 5066 }, { "epoch": 0.3906105457909343, "grad_norm": 3.5554182529449463, "learning_rate": 6.960893193362594e-06, "loss": 1.0024, "step": 5067 }, { "epoch": 0.39068763490595126, "grad_norm": 4.0424604415893555, "learning_rate": 6.959744699261641e-06, "loss": 0.9991, "step": 5068 }, { "epoch": 0.39076472402096823, "grad_norm": 3.6211678981781006, "learning_rate": 6.9585960829807555e-06, "loss": 1.0449, "step": 5069 }, { "epoch": 0.3908418131359852, "grad_norm": 3.4850916862487793, "learning_rate": 6.9574473445915495e-06, "loss": 0.9836, "step": 5070 }, { "epoch": 0.3909189022510022, "grad_norm": 4.005676746368408, "learning_rate": 6.956298484165638e-06, "loss": 0.8692, "step": 5071 }, { "epoch": 0.3909959913660191, "grad_norm": 3.5592994689941406, "learning_rate": 6.955149501774648e-06, "loss": 1.1276, "step": 5072 }, { "epoch": 0.39107308048103606, "grad_norm": 3.935800313949585, "learning_rate": 6.954000397490213e-06, "loss": 0.992, "step": 5073 }, { "epoch": 0.39115016959605303, "grad_norm": 3.423206090927124, "learning_rate": 6.952851171383972e-06, "loss": 0.923, "step": 5074 }, { "epoch": 0.39122725871107, "grad_norm": 3.449836254119873, "learning_rate": 6.951701823527575e-06, "loss": 0.9982, "step": 5075 }, { "epoch": 0.391304347826087, "grad_norm": 3.2396328449249268, "learning_rate": 6.950552353992678e-06, "loss": 0.8038, "step": 5076 }, { "epoch": 0.3913814369411039, "grad_norm": 3.798563003540039, "learning_rate": 6.949402762850943e-06, "loss": 1.0334, "step": 5077 }, { "epoch": 0.39145852605612086, "grad_norm": 3.7131567001342773, "learning_rate": 6.948253050174043e-06, "loss": 1.0026, "step": 5078 }, { "epoch": 0.39153561517113783, "grad_norm": 4.187877655029297, "learning_rate": 6.947103216033655e-06, "loss": 1.1898, "step": 5079 }, { "epoch": 0.3916127042861548, "grad_norm": 3.439176082611084, "learning_rate": 6.945953260501466e-06, "loss": 0.9082, "step": 5080 }, { "epoch": 0.39168979340117177, "grad_norm": 3.9063847064971924, "learning_rate": 6.9448031836491705e-06, "loss": 1.0431, "step": 5081 }, { "epoch": 0.39176688251618874, "grad_norm": 3.500028610229492, "learning_rate": 6.943652985548468e-06, "loss": 0.9663, "step": 5082 }, { "epoch": 0.39184397163120566, "grad_norm": 3.206749439239502, "learning_rate": 6.942502666271069e-06, "loss": 0.8262, "step": 5083 }, { "epoch": 0.3919210607462226, "grad_norm": 3.7229039669036865, "learning_rate": 6.9413522258886874e-06, "loss": 0.9906, "step": 5084 }, { "epoch": 0.3919981498612396, "grad_norm": 3.451598882675171, "learning_rate": 6.940201664473051e-06, "loss": 0.9667, "step": 5085 }, { "epoch": 0.39207523897625657, "grad_norm": 4.100324630737305, "learning_rate": 6.939050982095889e-06, "loss": 0.9991, "step": 5086 }, { "epoch": 0.39215232809127354, "grad_norm": 3.7283709049224854, "learning_rate": 6.93790017882894e-06, "loss": 0.9816, "step": 5087 }, { "epoch": 0.39222941720629045, "grad_norm": 3.6680102348327637, "learning_rate": 6.936749254743951e-06, "loss": 0.9915, "step": 5088 }, { "epoch": 0.3923065063213074, "grad_norm": 3.723318576812744, "learning_rate": 6.935598209912679e-06, "loss": 0.9894, "step": 5089 }, { "epoch": 0.3923835954363244, "grad_norm": 3.6859655380249023, "learning_rate": 6.934447044406882e-06, "loss": 1.0608, "step": 5090 }, { "epoch": 0.39246068455134137, "grad_norm": 4.22714900970459, "learning_rate": 6.9332957582983295e-06, "loss": 0.977, "step": 5091 }, { "epoch": 0.39253777366635834, "grad_norm": 3.4322969913482666, "learning_rate": 6.932144351658801e-06, "loss": 1.0241, "step": 5092 }, { "epoch": 0.39261486278137525, "grad_norm": 3.7159926891326904, "learning_rate": 6.930992824560078e-06, "loss": 0.901, "step": 5093 }, { "epoch": 0.3926919518963922, "grad_norm": 3.612903356552124, "learning_rate": 6.9298411770739535e-06, "loss": 0.9303, "step": 5094 }, { "epoch": 0.3927690410114092, "grad_norm": 3.71382999420166, "learning_rate": 6.9286894092722265e-06, "loss": 0.9735, "step": 5095 }, { "epoch": 0.39284613012642616, "grad_norm": 3.824847459793091, "learning_rate": 6.9275375212267035e-06, "loss": 1.0065, "step": 5096 }, { "epoch": 0.39292321924144313, "grad_norm": 3.6681530475616455, "learning_rate": 6.926385513009199e-06, "loss": 1.0493, "step": 5097 }, { "epoch": 0.39300030835646005, "grad_norm": 3.7533106803894043, "learning_rate": 6.925233384691534e-06, "loss": 1.077, "step": 5098 }, { "epoch": 0.393077397471477, "grad_norm": 3.778269052505493, "learning_rate": 6.924081136345541e-06, "loss": 0.9683, "step": 5099 }, { "epoch": 0.393154486586494, "grad_norm": 4.528807163238525, "learning_rate": 6.9229287680430526e-06, "loss": 1.0982, "step": 5100 }, { "epoch": 0.39323157570151096, "grad_norm": 3.8181581497192383, "learning_rate": 6.921776279855914e-06, "loss": 0.8895, "step": 5101 }, { "epoch": 0.39330866481652793, "grad_norm": 4.004360198974609, "learning_rate": 6.92062367185598e-06, "loss": 0.9677, "step": 5102 }, { "epoch": 0.39338575393154485, "grad_norm": 3.5526719093322754, "learning_rate": 6.919470944115104e-06, "loss": 0.9523, "step": 5103 }, { "epoch": 0.3934628430465618, "grad_norm": 3.5170533657073975, "learning_rate": 6.918318096705157e-06, "loss": 0.9552, "step": 5104 }, { "epoch": 0.3935399321615788, "grad_norm": 3.4880666732788086, "learning_rate": 6.917165129698013e-06, "loss": 0.9412, "step": 5105 }, { "epoch": 0.39361702127659576, "grad_norm": 3.667492628097534, "learning_rate": 6.916012043165552e-06, "loss": 0.889, "step": 5106 }, { "epoch": 0.39369411039161273, "grad_norm": 3.4989163875579834, "learning_rate": 6.9148588371796635e-06, "loss": 1.009, "step": 5107 }, { "epoch": 0.39377119950662964, "grad_norm": 3.465567111968994, "learning_rate": 6.913705511812243e-06, "loss": 0.9644, "step": 5108 }, { "epoch": 0.3938482886216466, "grad_norm": 3.9162404537200928, "learning_rate": 6.912552067135195e-06, "loss": 0.9753, "step": 5109 }, { "epoch": 0.3939253777366636, "grad_norm": 3.494250774383545, "learning_rate": 6.9113985032204325e-06, "loss": 1.0625, "step": 5110 }, { "epoch": 0.39400246685168056, "grad_norm": 3.6587326526641846, "learning_rate": 6.910244820139871e-06, "loss": 0.9012, "step": 5111 }, { "epoch": 0.3940795559666975, "grad_norm": 4.449551105499268, "learning_rate": 6.909091017965439e-06, "loss": 1.0422, "step": 5112 }, { "epoch": 0.39415664508171444, "grad_norm": 3.5880303382873535, "learning_rate": 6.907937096769071e-06, "loss": 1.0439, "step": 5113 }, { "epoch": 0.3942337341967314, "grad_norm": 4.151397228240967, "learning_rate": 6.906783056622703e-06, "loss": 1.1482, "step": 5114 }, { "epoch": 0.3943108233117484, "grad_norm": 3.602203607559204, "learning_rate": 6.9056288975982896e-06, "loss": 0.9506, "step": 5115 }, { "epoch": 0.39438791242676535, "grad_norm": 3.9229793548583984, "learning_rate": 6.904474619767784e-06, "loss": 1.0528, "step": 5116 }, { "epoch": 0.3944650015417823, "grad_norm": 3.542268991470337, "learning_rate": 6.903320223203148e-06, "loss": 1.0154, "step": 5117 }, { "epoch": 0.39454209065679924, "grad_norm": 3.427914619445801, "learning_rate": 6.9021657079763545e-06, "loss": 0.9821, "step": 5118 }, { "epoch": 0.3946191797718162, "grad_norm": 3.257849931716919, "learning_rate": 6.901011074159381e-06, "loss": 1.0341, "step": 5119 }, { "epoch": 0.3946962688868332, "grad_norm": 3.4966633319854736, "learning_rate": 6.899856321824212e-06, "loss": 0.9723, "step": 5120 }, { "epoch": 0.39477335800185015, "grad_norm": 3.4750454425811768, "learning_rate": 6.89870145104284e-06, "loss": 0.9318, "step": 5121 }, { "epoch": 0.3948504471168671, "grad_norm": 4.037153244018555, "learning_rate": 6.897546461887268e-06, "loss": 0.9593, "step": 5122 }, { "epoch": 0.39492753623188404, "grad_norm": 3.800772190093994, "learning_rate": 6.896391354429501e-06, "loss": 1.0182, "step": 5123 }, { "epoch": 0.395004625346901, "grad_norm": 3.4899842739105225, "learning_rate": 6.895236128741554e-06, "loss": 1.0558, "step": 5124 }, { "epoch": 0.395081714461918, "grad_norm": 3.538621187210083, "learning_rate": 6.8940807848954506e-06, "loss": 0.9602, "step": 5125 }, { "epoch": 0.39515880357693495, "grad_norm": 4.251044273376465, "learning_rate": 6.892925322963221e-06, "loss": 1.1921, "step": 5126 }, { "epoch": 0.3952358926919519, "grad_norm": 3.6266791820526123, "learning_rate": 6.8917697430169e-06, "loss": 0.9136, "step": 5127 }, { "epoch": 0.39531298180696883, "grad_norm": 3.5252368450164795, "learning_rate": 6.890614045128533e-06, "loss": 0.911, "step": 5128 }, { "epoch": 0.3953900709219858, "grad_norm": 4.031729698181152, "learning_rate": 6.889458229370173e-06, "loss": 1.0345, "step": 5129 }, { "epoch": 0.3954671600370028, "grad_norm": 3.6114397048950195, "learning_rate": 6.888302295813878e-06, "loss": 1.0215, "step": 5130 }, { "epoch": 0.39554424915201974, "grad_norm": 3.7317934036254883, "learning_rate": 6.887146244531715e-06, "loss": 1.0653, "step": 5131 }, { "epoch": 0.3956213382670367, "grad_norm": 3.487471580505371, "learning_rate": 6.885990075595757e-06, "loss": 1.0672, "step": 5132 }, { "epoch": 0.39569842738205363, "grad_norm": 3.7505714893341064, "learning_rate": 6.884833789078084e-06, "loss": 1.0301, "step": 5133 }, { "epoch": 0.3957755164970706, "grad_norm": 3.2812490463256836, "learning_rate": 6.883677385050788e-06, "loss": 0.9663, "step": 5134 }, { "epoch": 0.39585260561208757, "grad_norm": 4.058938026428223, "learning_rate": 6.882520863585962e-06, "loss": 1.0798, "step": 5135 }, { "epoch": 0.39592969472710454, "grad_norm": 3.5618515014648438, "learning_rate": 6.88136422475571e-06, "loss": 0.9328, "step": 5136 }, { "epoch": 0.3960067838421215, "grad_norm": 3.4688501358032227, "learning_rate": 6.880207468632143e-06, "loss": 0.9176, "step": 5137 }, { "epoch": 0.3960838729571384, "grad_norm": 3.658219575881958, "learning_rate": 6.8790505952873775e-06, "loss": 0.9566, "step": 5138 }, { "epoch": 0.3961609620721554, "grad_norm": 3.3856353759765625, "learning_rate": 6.877893604793539e-06, "loss": 1.046, "step": 5139 }, { "epoch": 0.39623805118717237, "grad_norm": 3.460387706756592, "learning_rate": 6.876736497222762e-06, "loss": 0.9436, "step": 5140 }, { "epoch": 0.39631514030218934, "grad_norm": 3.813292980194092, "learning_rate": 6.875579272647182e-06, "loss": 1.0252, "step": 5141 }, { "epoch": 0.3963922294172063, "grad_norm": 3.8571043014526367, "learning_rate": 6.87442193113895e-06, "loss": 1.0681, "step": 5142 }, { "epoch": 0.3964693185322232, "grad_norm": 3.726741075515747, "learning_rate": 6.873264472770217e-06, "loss": 0.9728, "step": 5143 }, { "epoch": 0.3965464076472402, "grad_norm": 3.4998526573181152, "learning_rate": 6.8721068976131466e-06, "loss": 0.966, "step": 5144 }, { "epoch": 0.39662349676225717, "grad_norm": 3.4608278274536133, "learning_rate": 6.870949205739907e-06, "loss": 1.0177, "step": 5145 }, { "epoch": 0.39670058587727414, "grad_norm": 3.6188559532165527, "learning_rate": 6.869791397222674e-06, "loss": 1.0666, "step": 5146 }, { "epoch": 0.3967776749922911, "grad_norm": 3.4421889781951904, "learning_rate": 6.868633472133632e-06, "loss": 0.9928, "step": 5147 }, { "epoch": 0.396854764107308, "grad_norm": 3.740668773651123, "learning_rate": 6.867475430544971e-06, "loss": 1.0269, "step": 5148 }, { "epoch": 0.396931853222325, "grad_norm": 3.9305145740509033, "learning_rate": 6.866317272528889e-06, "loss": 0.9517, "step": 5149 }, { "epoch": 0.39700894233734196, "grad_norm": 3.588923931121826, "learning_rate": 6.865158998157591e-06, "loss": 0.9619, "step": 5150 }, { "epoch": 0.39708603145235893, "grad_norm": 3.9232397079467773, "learning_rate": 6.8640006075032875e-06, "loss": 1.1459, "step": 5151 }, { "epoch": 0.3971631205673759, "grad_norm": 3.4399147033691406, "learning_rate": 6.862842100638201e-06, "loss": 0.8585, "step": 5152 }, { "epoch": 0.3972402096823928, "grad_norm": 3.5570197105407715, "learning_rate": 6.861683477634559e-06, "loss": 1.0854, "step": 5153 }, { "epoch": 0.3973172987974098, "grad_norm": 3.628215789794922, "learning_rate": 6.860524738564591e-06, "loss": 0.9911, "step": 5154 }, { "epoch": 0.39739438791242676, "grad_norm": 4.154507160186768, "learning_rate": 6.859365883500545e-06, "loss": 1.0277, "step": 5155 }, { "epoch": 0.39747147702744373, "grad_norm": 3.7878050804138184, "learning_rate": 6.858206912514664e-06, "loss": 1.075, "step": 5156 }, { "epoch": 0.3975485661424607, "grad_norm": 3.571855068206787, "learning_rate": 6.857047825679206e-06, "loss": 0.9955, "step": 5157 }, { "epoch": 0.3976256552574776, "grad_norm": 3.5463037490844727, "learning_rate": 6.855888623066434e-06, "loss": 0.969, "step": 5158 }, { "epoch": 0.3977027443724946, "grad_norm": 3.6123862266540527, "learning_rate": 6.854729304748619e-06, "loss": 0.8627, "step": 5159 }, { "epoch": 0.39777983348751156, "grad_norm": 4.434622764587402, "learning_rate": 6.8535698707980356e-06, "loss": 0.9712, "step": 5160 }, { "epoch": 0.39785692260252853, "grad_norm": 3.731267213821411, "learning_rate": 6.852410321286974e-06, "loss": 0.9873, "step": 5161 }, { "epoch": 0.3979340117175455, "grad_norm": 3.5864498615264893, "learning_rate": 6.85125065628772e-06, "loss": 0.9812, "step": 5162 }, { "epoch": 0.3980111008325624, "grad_norm": 3.548417329788208, "learning_rate": 6.850090875872577e-06, "loss": 0.9825, "step": 5163 }, { "epoch": 0.3980881899475794, "grad_norm": 3.9418680667877197, "learning_rate": 6.848930980113848e-06, "loss": 1.1676, "step": 5164 }, { "epoch": 0.39816527906259636, "grad_norm": 3.502095937728882, "learning_rate": 6.8477709690838486e-06, "loss": 0.9376, "step": 5165 }, { "epoch": 0.3982423681776133, "grad_norm": 3.5473270416259766, "learning_rate": 6.846610842854902e-06, "loss": 0.9449, "step": 5166 }, { "epoch": 0.3983194572926303, "grad_norm": 3.6665074825286865, "learning_rate": 6.845450601499331e-06, "loss": 1.0283, "step": 5167 }, { "epoch": 0.39839654640764727, "grad_norm": 3.162196636199951, "learning_rate": 6.844290245089473e-06, "loss": 0.8484, "step": 5168 }, { "epoch": 0.3984736355226642, "grad_norm": 3.5580244064331055, "learning_rate": 6.84312977369767e-06, "loss": 0.958, "step": 5169 }, { "epoch": 0.39855072463768115, "grad_norm": 3.2327682971954346, "learning_rate": 6.841969187396271e-06, "loss": 0.8806, "step": 5170 }, { "epoch": 0.3986278137526981, "grad_norm": 3.59846568107605, "learning_rate": 6.840808486257634e-06, "loss": 0.8916, "step": 5171 }, { "epoch": 0.3987049028677151, "grad_norm": 3.460810899734497, "learning_rate": 6.83964767035412e-06, "loss": 1.0101, "step": 5172 }, { "epoch": 0.39878199198273206, "grad_norm": 3.336289882659912, "learning_rate": 6.838486739758102e-06, "loss": 0.9098, "step": 5173 }, { "epoch": 0.398859081097749, "grad_norm": 3.8699560165405273, "learning_rate": 6.83732569454196e-06, "loss": 1.0069, "step": 5174 }, { "epoch": 0.39893617021276595, "grad_norm": 3.5346524715423584, "learning_rate": 6.836164534778074e-06, "loss": 0.9581, "step": 5175 }, { "epoch": 0.3990132593277829, "grad_norm": 3.468693733215332, "learning_rate": 6.835003260538839e-06, "loss": 0.9279, "step": 5176 }, { "epoch": 0.3990903484427999, "grad_norm": 3.598850727081299, "learning_rate": 6.833841871896656e-06, "loss": 1.0076, "step": 5177 }, { "epoch": 0.39916743755781686, "grad_norm": 3.525563955307007, "learning_rate": 6.83268036892393e-06, "loss": 0.9998, "step": 5178 }, { "epoch": 0.3992445266728338, "grad_norm": 3.8953518867492676, "learning_rate": 6.831518751693073e-06, "loss": 0.9768, "step": 5179 }, { "epoch": 0.39932161578785075, "grad_norm": 3.505366802215576, "learning_rate": 6.830357020276509e-06, "loss": 1.007, "step": 5180 }, { "epoch": 0.3993987049028677, "grad_norm": 3.4880192279815674, "learning_rate": 6.829195174746663e-06, "loss": 1.0653, "step": 5181 }, { "epoch": 0.3994757940178847, "grad_norm": 3.4839048385620117, "learning_rate": 6.828033215175974e-06, "loss": 0.9051, "step": 5182 }, { "epoch": 0.39955288313290166, "grad_norm": 3.4201345443725586, "learning_rate": 6.826871141636879e-06, "loss": 0.9144, "step": 5183 }, { "epoch": 0.3996299722479186, "grad_norm": 3.57698917388916, "learning_rate": 6.825708954201832e-06, "loss": 0.9932, "step": 5184 }, { "epoch": 0.39970706136293555, "grad_norm": 3.7728288173675537, "learning_rate": 6.824546652943287e-06, "loss": 1.0721, "step": 5185 }, { "epoch": 0.3997841504779525, "grad_norm": 3.7117831707000732, "learning_rate": 6.823384237933706e-06, "loss": 0.9387, "step": 5186 }, { "epoch": 0.3998612395929695, "grad_norm": 3.299254894256592, "learning_rate": 6.822221709245562e-06, "loss": 0.9593, "step": 5187 }, { "epoch": 0.39993832870798646, "grad_norm": 3.3196065425872803, "learning_rate": 6.8210590669513325e-06, "loss": 0.9527, "step": 5188 }, { "epoch": 0.40001541782300337, "grad_norm": 3.578375816345215, "learning_rate": 6.8198963111235e-06, "loss": 0.9586, "step": 5189 }, { "epoch": 0.40009250693802034, "grad_norm": 3.807652235031128, "learning_rate": 6.8187334418345605e-06, "loss": 0.8759, "step": 5190 }, { "epoch": 0.4001695960530373, "grad_norm": 3.5907301902770996, "learning_rate": 6.8175704591570105e-06, "loss": 1.0146, "step": 5191 }, { "epoch": 0.4002466851680543, "grad_norm": 3.224435806274414, "learning_rate": 6.816407363163354e-06, "loss": 0.9449, "step": 5192 }, { "epoch": 0.40032377428307125, "grad_norm": 3.421980619430542, "learning_rate": 6.815244153926106e-06, "loss": 0.893, "step": 5193 }, { "epoch": 0.40040086339808817, "grad_norm": 3.3092360496520996, "learning_rate": 6.814080831517787e-06, "loss": 0.8947, "step": 5194 }, { "epoch": 0.40047795251310514, "grad_norm": 3.3089075088500977, "learning_rate": 6.812917396010924e-06, "loss": 0.8276, "step": 5195 }, { "epoch": 0.4005550416281221, "grad_norm": 3.9031176567077637, "learning_rate": 6.811753847478051e-06, "loss": 1.0147, "step": 5196 }, { "epoch": 0.4006321307431391, "grad_norm": 3.475313901901245, "learning_rate": 6.810590185991707e-06, "loss": 0.9532, "step": 5197 }, { "epoch": 0.40070921985815605, "grad_norm": 3.7122201919555664, "learning_rate": 6.8094264116244434e-06, "loss": 1.0051, "step": 5198 }, { "epoch": 0.40078630897317297, "grad_norm": 3.6033477783203125, "learning_rate": 6.8082625244488145e-06, "loss": 1.0563, "step": 5199 }, { "epoch": 0.40086339808818994, "grad_norm": 4.018650054931641, "learning_rate": 6.807098524537381e-06, "loss": 0.922, "step": 5200 }, { "epoch": 0.4009404872032069, "grad_norm": 3.791778087615967, "learning_rate": 6.8059344119627155e-06, "loss": 1.0258, "step": 5201 }, { "epoch": 0.4010175763182239, "grad_norm": 3.5043606758117676, "learning_rate": 6.804770186797391e-06, "loss": 1.0035, "step": 5202 }, { "epoch": 0.40109466543324085, "grad_norm": 4.14236307144165, "learning_rate": 6.8036058491139944e-06, "loss": 0.9195, "step": 5203 }, { "epoch": 0.40117175454825776, "grad_norm": 3.829864263534546, "learning_rate": 6.802441398985114e-06, "loss": 0.9637, "step": 5204 }, { "epoch": 0.40124884366327473, "grad_norm": 3.572660446166992, "learning_rate": 6.801276836483346e-06, "loss": 0.8871, "step": 5205 }, { "epoch": 0.4013259327782917, "grad_norm": 3.405879259109497, "learning_rate": 6.800112161681297e-06, "loss": 0.8893, "step": 5206 }, { "epoch": 0.4014030218933087, "grad_norm": 3.654069662094116, "learning_rate": 6.798947374651578e-06, "loss": 1.0638, "step": 5207 }, { "epoch": 0.40148011100832565, "grad_norm": 3.986391067504883, "learning_rate": 6.797782475466806e-06, "loss": 1.0039, "step": 5208 }, { "epoch": 0.40155720012334256, "grad_norm": 3.699451446533203, "learning_rate": 6.7966174641996085e-06, "loss": 0.9939, "step": 5209 }, { "epoch": 0.40163428923835953, "grad_norm": 4.058764934539795, "learning_rate": 6.795452340922617e-06, "loss": 0.9998, "step": 5210 }, { "epoch": 0.4017113783533765, "grad_norm": 4.153298854827881, "learning_rate": 6.7942871057084715e-06, "loss": 1.0098, "step": 5211 }, { "epoch": 0.4017884674683935, "grad_norm": 3.8170831203460693, "learning_rate": 6.793121758629817e-06, "loss": 1.0502, "step": 5212 }, { "epoch": 0.40186555658341044, "grad_norm": 4.2434844970703125, "learning_rate": 6.791956299759307e-06, "loss": 0.9763, "step": 5213 }, { "epoch": 0.40194264569842736, "grad_norm": 4.152831077575684, "learning_rate": 6.790790729169604e-06, "loss": 1.0741, "step": 5214 }, { "epoch": 0.40201973481344433, "grad_norm": 3.7323899269104004, "learning_rate": 6.7896250469333725e-06, "loss": 0.9627, "step": 5215 }, { "epoch": 0.4020968239284613, "grad_norm": 4.520125389099121, "learning_rate": 6.78845925312329e-06, "loss": 1.0859, "step": 5216 }, { "epoch": 0.40217391304347827, "grad_norm": 5.602902889251709, "learning_rate": 6.787293347812034e-06, "loss": 1.0761, "step": 5217 }, { "epoch": 0.40225100215849524, "grad_norm": 3.4149789810180664, "learning_rate": 6.7861273310722945e-06, "loss": 0.9928, "step": 5218 }, { "epoch": 0.40232809127351216, "grad_norm": 3.432222366333008, "learning_rate": 6.784961202976768e-06, "loss": 0.9324, "step": 5219 }, { "epoch": 0.4024051803885291, "grad_norm": 3.2903270721435547, "learning_rate": 6.7837949635981524e-06, "loss": 0.9092, "step": 5220 }, { "epoch": 0.4024822695035461, "grad_norm": 3.851072072982788, "learning_rate": 6.782628613009161e-06, "loss": 0.9979, "step": 5221 }, { "epoch": 0.40255935861856307, "grad_norm": 3.850883960723877, "learning_rate": 6.781462151282508e-06, "loss": 0.9482, "step": 5222 }, { "epoch": 0.40263644773358004, "grad_norm": 3.9080920219421387, "learning_rate": 6.7802955784909165e-06, "loss": 0.9543, "step": 5223 }, { "epoch": 0.40271353684859695, "grad_norm": 3.799767255783081, "learning_rate": 6.779128894707116e-06, "loss": 0.9217, "step": 5224 }, { "epoch": 0.4027906259636139, "grad_norm": 3.8918371200561523, "learning_rate": 6.777962100003843e-06, "loss": 1.1152, "step": 5225 }, { "epoch": 0.4028677150786309, "grad_norm": 3.4487974643707275, "learning_rate": 6.776795194453841e-06, "loss": 1.0196, "step": 5226 }, { "epoch": 0.40294480419364787, "grad_norm": 3.964271068572998, "learning_rate": 6.7756281781298615e-06, "loss": 1.0352, "step": 5227 }, { "epoch": 0.40302189330866484, "grad_norm": 3.359325647354126, "learning_rate": 6.7744610511046615e-06, "loss": 0.933, "step": 5228 }, { "epoch": 0.40309898242368175, "grad_norm": 3.6119518280029297, "learning_rate": 6.773293813451004e-06, "loss": 0.9631, "step": 5229 }, { "epoch": 0.4031760715386987, "grad_norm": 3.3389744758605957, "learning_rate": 6.772126465241663e-06, "loss": 0.9816, "step": 5230 }, { "epoch": 0.4032531606537157, "grad_norm": 3.3862054347991943, "learning_rate": 6.7709590065494125e-06, "loss": 0.9248, "step": 5231 }, { "epoch": 0.40333024976873266, "grad_norm": 3.3513505458831787, "learning_rate": 6.769791437447042e-06, "loss": 1.0087, "step": 5232 }, { "epoch": 0.40340733888374963, "grad_norm": 3.7678074836730957, "learning_rate": 6.76862375800734e-06, "loss": 0.8853, "step": 5233 }, { "epoch": 0.40348442799876655, "grad_norm": 3.6383919715881348, "learning_rate": 6.767455968303107e-06, "loss": 0.8316, "step": 5234 }, { "epoch": 0.4035615171137835, "grad_norm": 3.6129610538482666, "learning_rate": 6.7662880684071495e-06, "loss": 1.0224, "step": 5235 }, { "epoch": 0.4036386062288005, "grad_norm": 3.6119580268859863, "learning_rate": 6.765120058392278e-06, "loss": 1.0199, "step": 5236 }, { "epoch": 0.40371569534381746, "grad_norm": 3.5133492946624756, "learning_rate": 6.763951938331313e-06, "loss": 0.8906, "step": 5237 }, { "epoch": 0.40379278445883443, "grad_norm": 3.35998272895813, "learning_rate": 6.76278370829708e-06, "loss": 0.9159, "step": 5238 }, { "epoch": 0.40386987357385135, "grad_norm": 3.517707586288452, "learning_rate": 6.761615368362412e-06, "loss": 1.0151, "step": 5239 }, { "epoch": 0.4039469626888683, "grad_norm": 3.719403028488159, "learning_rate": 6.760446918600151e-06, "loss": 1.0202, "step": 5240 }, { "epoch": 0.4040240518038853, "grad_norm": 3.614016056060791, "learning_rate": 6.759278359083141e-06, "loss": 1.0067, "step": 5241 }, { "epoch": 0.40410114091890226, "grad_norm": 3.3453943729400635, "learning_rate": 6.758109689884236e-06, "loss": 1.0046, "step": 5242 }, { "epoch": 0.40417823003391923, "grad_norm": 3.9464776515960693, "learning_rate": 6.756940911076299e-06, "loss": 1.0484, "step": 5243 }, { "epoch": 0.40425531914893614, "grad_norm": 3.7068941593170166, "learning_rate": 6.755772022732194e-06, "loss": 0.9906, "step": 5244 }, { "epoch": 0.4043324082639531, "grad_norm": 3.643101215362549, "learning_rate": 6.754603024924799e-06, "loss": 1.0175, "step": 5245 }, { "epoch": 0.4044094973789701, "grad_norm": 3.6078343391418457, "learning_rate": 6.753433917726991e-06, "loss": 1.0381, "step": 5246 }, { "epoch": 0.40448658649398705, "grad_norm": 3.2811944484710693, "learning_rate": 6.75226470121166e-06, "loss": 0.946, "step": 5247 }, { "epoch": 0.404563675609004, "grad_norm": 3.3540406227111816, "learning_rate": 6.751095375451699e-06, "loss": 1.0243, "step": 5248 }, { "epoch": 0.404640764724021, "grad_norm": 3.783407211303711, "learning_rate": 6.749925940520012e-06, "loss": 1.0053, "step": 5249 }, { "epoch": 0.4047178538390379, "grad_norm": 3.748215913772583, "learning_rate": 6.7487563964895066e-06, "loss": 0.9892, "step": 5250 }, { "epoch": 0.4047949429540549, "grad_norm": 4.270641803741455, "learning_rate": 6.747586743433096e-06, "loss": 1.1076, "step": 5251 }, { "epoch": 0.40487203206907185, "grad_norm": 3.66277813911438, "learning_rate": 6.746416981423701e-06, "loss": 0.9607, "step": 5252 }, { "epoch": 0.4049491211840888, "grad_norm": 3.703913927078247, "learning_rate": 6.7452471105342536e-06, "loss": 0.9762, "step": 5253 }, { "epoch": 0.4050262102991058, "grad_norm": 3.490093469619751, "learning_rate": 6.744077130837687e-06, "loss": 0.943, "step": 5254 }, { "epoch": 0.4051032994141227, "grad_norm": 3.721001625061035, "learning_rate": 6.742907042406945e-06, "loss": 1.0145, "step": 5255 }, { "epoch": 0.4051803885291397, "grad_norm": 3.3705310821533203, "learning_rate": 6.741736845314977e-06, "loss": 0.9094, "step": 5256 }, { "epoch": 0.40525747764415665, "grad_norm": 3.7736752033233643, "learning_rate": 6.7405665396347345e-06, "loss": 0.8783, "step": 5257 }, { "epoch": 0.4053345667591736, "grad_norm": 3.2952444553375244, "learning_rate": 6.739396125439184e-06, "loss": 0.9976, "step": 5258 }, { "epoch": 0.4054116558741906, "grad_norm": 3.4504001140594482, "learning_rate": 6.7382256028012945e-06, "loss": 0.9702, "step": 5259 }, { "epoch": 0.4054887449892075, "grad_norm": 3.792487621307373, "learning_rate": 6.7370549717940405e-06, "loss": 1.0202, "step": 5260 }, { "epoch": 0.4055658341042245, "grad_norm": 3.9348297119140625, "learning_rate": 6.735884232490405e-06, "loss": 1.0302, "step": 5261 }, { "epoch": 0.40564292321924145, "grad_norm": 4.174359321594238, "learning_rate": 6.734713384963379e-06, "loss": 1.0545, "step": 5262 }, { "epoch": 0.4057200123342584, "grad_norm": 3.5805842876434326, "learning_rate": 6.733542429285957e-06, "loss": 1.0303, "step": 5263 }, { "epoch": 0.4057971014492754, "grad_norm": 3.4815800189971924, "learning_rate": 6.732371365531144e-06, "loss": 0.9535, "step": 5264 }, { "epoch": 0.4058741905642923, "grad_norm": 3.540489435195923, "learning_rate": 6.731200193771947e-06, "loss": 0.9182, "step": 5265 }, { "epoch": 0.4059512796793093, "grad_norm": 3.4096593856811523, "learning_rate": 6.730028914081384e-06, "loss": 1.0019, "step": 5266 }, { "epoch": 0.40602836879432624, "grad_norm": 3.462355136871338, "learning_rate": 6.72885752653248e-06, "loss": 0.9894, "step": 5267 }, { "epoch": 0.4061054579093432, "grad_norm": 3.640557289123535, "learning_rate": 6.7276860311982614e-06, "loss": 0.9686, "step": 5268 }, { "epoch": 0.4061825470243602, "grad_norm": 3.729519844055176, "learning_rate": 6.726514428151767e-06, "loss": 1.0958, "step": 5269 }, { "epoch": 0.4062596361393771, "grad_norm": 3.461050033569336, "learning_rate": 6.725342717466041e-06, "loss": 0.961, "step": 5270 }, { "epoch": 0.40633672525439407, "grad_norm": 3.6534311771392822, "learning_rate": 6.724170899214131e-06, "loss": 1.0216, "step": 5271 }, { "epoch": 0.40641381436941104, "grad_norm": 3.3762400150299072, "learning_rate": 6.7229989734690956e-06, "loss": 0.9508, "step": 5272 }, { "epoch": 0.406490903484428, "grad_norm": 3.6674129962921143, "learning_rate": 6.721826940303999e-06, "loss": 0.8763, "step": 5273 }, { "epoch": 0.406567992599445, "grad_norm": 3.514495849609375, "learning_rate": 6.720654799791908e-06, "loss": 0.9273, "step": 5274 }, { "epoch": 0.4066450817144619, "grad_norm": 3.849905490875244, "learning_rate": 6.719482552005903e-06, "loss": 1.0703, "step": 5275 }, { "epoch": 0.40672217082947887, "grad_norm": 3.744799852371216, "learning_rate": 6.718310197019066e-06, "loss": 1.0608, "step": 5276 }, { "epoch": 0.40679925994449584, "grad_norm": 3.5321202278137207, "learning_rate": 6.7171377349044875e-06, "loss": 0.9747, "step": 5277 }, { "epoch": 0.4068763490595128, "grad_norm": 3.6646018028259277, "learning_rate": 6.7159651657352656e-06, "loss": 1.0208, "step": 5278 }, { "epoch": 0.4069534381745298, "grad_norm": 3.462653636932373, "learning_rate": 6.714792489584501e-06, "loss": 0.9575, "step": 5279 }, { "epoch": 0.4070305272895467, "grad_norm": 3.5436105728149414, "learning_rate": 6.7136197065253075e-06, "loss": 0.9758, "step": 5280 }, { "epoch": 0.40710761640456367, "grad_norm": 3.5151219367980957, "learning_rate": 6.7124468166308e-06, "loss": 1.0746, "step": 5281 }, { "epoch": 0.40718470551958064, "grad_norm": 3.4494199752807617, "learning_rate": 6.711273819974101e-06, "loss": 0.9407, "step": 5282 }, { "epoch": 0.4072617946345976, "grad_norm": 3.708519220352173, "learning_rate": 6.710100716628345e-06, "loss": 1.044, "step": 5283 }, { "epoch": 0.4073388837496146, "grad_norm": 3.8825523853302, "learning_rate": 6.708927506666664e-06, "loss": 0.9725, "step": 5284 }, { "epoch": 0.4074159728646315, "grad_norm": 3.7221336364746094, "learning_rate": 6.707754190162203e-06, "loss": 0.9684, "step": 5285 }, { "epoch": 0.40749306197964846, "grad_norm": 3.6459972858428955, "learning_rate": 6.7065807671881155e-06, "loss": 1.0946, "step": 5286 }, { "epoch": 0.40757015109466543, "grad_norm": 3.2291955947875977, "learning_rate": 6.705407237817554e-06, "loss": 0.8986, "step": 5287 }, { "epoch": 0.4076472402096824, "grad_norm": 3.8956899642944336, "learning_rate": 6.704233602123685e-06, "loss": 1.0724, "step": 5288 }, { "epoch": 0.4077243293246994, "grad_norm": 3.887136459350586, "learning_rate": 6.703059860179677e-06, "loss": 1.0851, "step": 5289 }, { "epoch": 0.4078014184397163, "grad_norm": 3.6385395526885986, "learning_rate": 6.701886012058706e-06, "loss": 0.9677, "step": 5290 }, { "epoch": 0.40787850755473326, "grad_norm": 3.614091634750366, "learning_rate": 6.700712057833958e-06, "loss": 0.991, "step": 5291 }, { "epoch": 0.40795559666975023, "grad_norm": 3.7074344158172607, "learning_rate": 6.69953799757862e-06, "loss": 0.8541, "step": 5292 }, { "epoch": 0.4080326857847672, "grad_norm": 3.504150152206421, "learning_rate": 6.69836383136589e-06, "loss": 0.942, "step": 5293 }, { "epoch": 0.4081097748997842, "grad_norm": 3.62353777885437, "learning_rate": 6.697189559268973e-06, "loss": 0.9787, "step": 5294 }, { "epoch": 0.4081868640148011, "grad_norm": 3.5454580783843994, "learning_rate": 6.696015181361076e-06, "loss": 1.0769, "step": 5295 }, { "epoch": 0.40826395312981806, "grad_norm": 3.8468873500823975, "learning_rate": 6.694840697715415e-06, "loss": 1.0924, "step": 5296 }, { "epoch": 0.40834104224483503, "grad_norm": 3.6607978343963623, "learning_rate": 6.693666108405215e-06, "loss": 0.9552, "step": 5297 }, { "epoch": 0.408418131359852, "grad_norm": 3.949882984161377, "learning_rate": 6.692491413503704e-06, "loss": 0.9816, "step": 5298 }, { "epoch": 0.40849522047486897, "grad_norm": 3.432330369949341, "learning_rate": 6.691316613084121e-06, "loss": 0.8183, "step": 5299 }, { "epoch": 0.4085723095898859, "grad_norm": 3.6200854778289795, "learning_rate": 6.690141707219706e-06, "loss": 0.992, "step": 5300 }, { "epoch": 0.40864939870490286, "grad_norm": 3.9913625717163086, "learning_rate": 6.688966695983708e-06, "loss": 0.9583, "step": 5301 }, { "epoch": 0.4087264878199198, "grad_norm": 3.4617879390716553, "learning_rate": 6.6877915794493855e-06, "loss": 0.9242, "step": 5302 }, { "epoch": 0.4088035769349368, "grad_norm": 3.69291615486145, "learning_rate": 6.6866163576899985e-06, "loss": 0.9768, "step": 5303 }, { "epoch": 0.40888066604995377, "grad_norm": 3.7546987533569336, "learning_rate": 6.6854410307788175e-06, "loss": 0.9785, "step": 5304 }, { "epoch": 0.4089577551649707, "grad_norm": 3.5481560230255127, "learning_rate": 6.684265598789117e-06, "loss": 1.0147, "step": 5305 }, { "epoch": 0.40903484427998765, "grad_norm": 4.156661510467529, "learning_rate": 6.683090061794179e-06, "loss": 0.9727, "step": 5306 }, { "epoch": 0.4091119333950046, "grad_norm": 3.6532485485076904, "learning_rate": 6.6819144198672925e-06, "loss": 0.9808, "step": 5307 }, { "epoch": 0.4091890225100216, "grad_norm": 3.576277494430542, "learning_rate": 6.6807386730817525e-06, "loss": 0.9245, "step": 5308 }, { "epoch": 0.40926611162503856, "grad_norm": 3.498910903930664, "learning_rate": 6.679562821510862e-06, "loss": 0.9565, "step": 5309 }, { "epoch": 0.4093432007400555, "grad_norm": 3.947864294052124, "learning_rate": 6.6783868652279285e-06, "loss": 1.0307, "step": 5310 }, { "epoch": 0.40942028985507245, "grad_norm": 3.714496612548828, "learning_rate": 6.677210804306266e-06, "loss": 0.9651, "step": 5311 }, { "epoch": 0.4094973789700894, "grad_norm": 3.6817400455474854, "learning_rate": 6.676034638819197e-06, "loss": 0.932, "step": 5312 }, { "epoch": 0.4095744680851064, "grad_norm": 3.330031394958496, "learning_rate": 6.674858368840047e-06, "loss": 0.964, "step": 5313 }, { "epoch": 0.40965155720012336, "grad_norm": 3.4984524250030518, "learning_rate": 6.673681994442153e-06, "loss": 0.9394, "step": 5314 }, { "epoch": 0.4097286463151403, "grad_norm": 3.6142520904541016, "learning_rate": 6.6725055156988545e-06, "loss": 1.0045, "step": 5315 }, { "epoch": 0.40980573543015725, "grad_norm": 3.7212975025177, "learning_rate": 6.671328932683499e-06, "loss": 1.0474, "step": 5316 }, { "epoch": 0.4098828245451742, "grad_norm": 3.747412919998169, "learning_rate": 6.670152245469441e-06, "loss": 1.0149, "step": 5317 }, { "epoch": 0.4099599136601912, "grad_norm": 3.9034531116485596, "learning_rate": 6.6689754541300426e-06, "loss": 1.1157, "step": 5318 }, { "epoch": 0.41003700277520816, "grad_norm": 4.028858184814453, "learning_rate": 6.667798558738664e-06, "loss": 1.0, "step": 5319 }, { "epoch": 0.4101140918902251, "grad_norm": 3.367152690887451, "learning_rate": 6.666621559368687e-06, "loss": 0.9779, "step": 5320 }, { "epoch": 0.41019118100524204, "grad_norm": 5.969235420227051, "learning_rate": 6.665444456093485e-06, "loss": 1.0506, "step": 5321 }, { "epoch": 0.410268270120259, "grad_norm": 3.4636454582214355, "learning_rate": 6.664267248986447e-06, "loss": 0.941, "step": 5322 }, { "epoch": 0.410345359235276, "grad_norm": 3.7874643802642822, "learning_rate": 6.663089938120967e-06, "loss": 1.089, "step": 5323 }, { "epoch": 0.41042244835029296, "grad_norm": 3.6759419441223145, "learning_rate": 6.6619125235704414e-06, "loss": 0.9959, "step": 5324 }, { "epoch": 0.41049953746530987, "grad_norm": 3.6804399490356445, "learning_rate": 6.660735005408278e-06, "loss": 1.0022, "step": 5325 }, { "epoch": 0.41057662658032684, "grad_norm": 3.5297484397888184, "learning_rate": 6.659557383707887e-06, "loss": 1.0264, "step": 5326 }, { "epoch": 0.4106537156953438, "grad_norm": 3.6392202377319336, "learning_rate": 6.65837965854269e-06, "loss": 0.9898, "step": 5327 }, { "epoch": 0.4107308048103608, "grad_norm": 3.438964366912842, "learning_rate": 6.65720182998611e-06, "loss": 0.9249, "step": 5328 }, { "epoch": 0.41080789392537775, "grad_norm": 3.7228152751922607, "learning_rate": 6.656023898111577e-06, "loss": 1.0178, "step": 5329 }, { "epoch": 0.41088498304039467, "grad_norm": 3.804844856262207, "learning_rate": 6.654845862992532e-06, "loss": 0.8827, "step": 5330 }, { "epoch": 0.41096207215541164, "grad_norm": 3.4772403240203857, "learning_rate": 6.653667724702419e-06, "loss": 1.0009, "step": 5331 }, { "epoch": 0.4110391612704286, "grad_norm": 3.5902342796325684, "learning_rate": 6.652489483314686e-06, "loss": 1.0335, "step": 5332 }, { "epoch": 0.4111162503854456, "grad_norm": 3.510636329650879, "learning_rate": 6.651311138902792e-06, "loss": 1.0807, "step": 5333 }, { "epoch": 0.41119333950046255, "grad_norm": 3.3497116565704346, "learning_rate": 6.650132691540203e-06, "loss": 0.9117, "step": 5334 }, { "epoch": 0.4112704286154795, "grad_norm": 3.712567090988159, "learning_rate": 6.648954141300386e-06, "loss": 0.9818, "step": 5335 }, { "epoch": 0.41134751773049644, "grad_norm": 3.705669641494751, "learning_rate": 6.647775488256818e-06, "loss": 0.957, "step": 5336 }, { "epoch": 0.4114246068455134, "grad_norm": 3.471576452255249, "learning_rate": 6.646596732482982e-06, "loss": 0.9049, "step": 5337 }, { "epoch": 0.4115016959605304, "grad_norm": 3.4686636924743652, "learning_rate": 6.645417874052368e-06, "loss": 1.0353, "step": 5338 }, { "epoch": 0.41157878507554735, "grad_norm": 3.8832995891571045, "learning_rate": 6.644238913038472e-06, "loss": 1.0707, "step": 5339 }, { "epoch": 0.4116558741905643, "grad_norm": 3.786958694458008, "learning_rate": 6.643059849514795e-06, "loss": 1.0643, "step": 5340 }, { "epoch": 0.41173296330558123, "grad_norm": 3.698564052581787, "learning_rate": 6.641880683554846e-06, "loss": 1.0087, "step": 5341 }, { "epoch": 0.4118100524205982, "grad_norm": 3.6572458744049072, "learning_rate": 6.640701415232139e-06, "loss": 0.9285, "step": 5342 }, { "epoch": 0.4118871415356152, "grad_norm": 3.6328537464141846, "learning_rate": 6.639522044620197e-06, "loss": 1.0668, "step": 5343 }, { "epoch": 0.41196423065063215, "grad_norm": 3.2839369773864746, "learning_rate": 6.638342571792548e-06, "loss": 0.8407, "step": 5344 }, { "epoch": 0.4120413197656491, "grad_norm": 3.6459763050079346, "learning_rate": 6.637162996822724e-06, "loss": 0.9962, "step": 5345 }, { "epoch": 0.41211840888066603, "grad_norm": 3.8563995361328125, "learning_rate": 6.635983319784265e-06, "loss": 0.9631, "step": 5346 }, { "epoch": 0.412195497995683, "grad_norm": 3.544633388519287, "learning_rate": 6.63480354075072e-06, "loss": 0.8998, "step": 5347 }, { "epoch": 0.4122725871107, "grad_norm": 3.736931085586548, "learning_rate": 6.633623659795642e-06, "loss": 0.985, "step": 5348 }, { "epoch": 0.41234967622571694, "grad_norm": 3.825957775115967, "learning_rate": 6.632443676992588e-06, "loss": 0.9953, "step": 5349 }, { "epoch": 0.4124267653407339, "grad_norm": 3.397263288497925, "learning_rate": 6.631263592415127e-06, "loss": 0.9607, "step": 5350 }, { "epoch": 0.41250385445575083, "grad_norm": 3.639387845993042, "learning_rate": 6.630083406136829e-06, "loss": 0.9568, "step": 5351 }, { "epoch": 0.4125809435707678, "grad_norm": 3.8172554969787598, "learning_rate": 6.628903118231274e-06, "loss": 0.9392, "step": 5352 }, { "epoch": 0.41265803268578477, "grad_norm": 3.8433003425598145, "learning_rate": 6.627722728772044e-06, "loss": 0.987, "step": 5353 }, { "epoch": 0.41273512180080174, "grad_norm": 3.482104778289795, "learning_rate": 6.6265422378327314e-06, "loss": 0.9403, "step": 5354 }, { "epoch": 0.4128122109158187, "grad_norm": 3.8958816528320312, "learning_rate": 6.625361645486936e-06, "loss": 0.9892, "step": 5355 }, { "epoch": 0.4128893000308356, "grad_norm": 3.7703473567962646, "learning_rate": 6.62418095180826e-06, "loss": 0.951, "step": 5356 }, { "epoch": 0.4129663891458526, "grad_norm": 3.737750291824341, "learning_rate": 6.623000156870313e-06, "loss": 0.9753, "step": 5357 }, { "epoch": 0.41304347826086957, "grad_norm": 3.6272356510162354, "learning_rate": 6.621819260746713e-06, "loss": 1.0136, "step": 5358 }, { "epoch": 0.41312056737588654, "grad_norm": 3.797292470932007, "learning_rate": 6.62063826351108e-06, "loss": 1.0478, "step": 5359 }, { "epoch": 0.4131976564909035, "grad_norm": 4.469625949859619, "learning_rate": 6.619457165237046e-06, "loss": 1.0983, "step": 5360 }, { "epoch": 0.4132747456059204, "grad_norm": 3.5453269481658936, "learning_rate": 6.618275965998244e-06, "loss": 1.0379, "step": 5361 }, { "epoch": 0.4133518347209374, "grad_norm": 3.681480646133423, "learning_rate": 6.617094665868317e-06, "loss": 0.9983, "step": 5362 }, { "epoch": 0.41342892383595437, "grad_norm": 3.6391947269439697, "learning_rate": 6.615913264920912e-06, "loss": 0.9968, "step": 5363 }, { "epoch": 0.41350601295097134, "grad_norm": 3.4914567470550537, "learning_rate": 6.614731763229686e-06, "loss": 0.9064, "step": 5364 }, { "epoch": 0.4135831020659883, "grad_norm": 3.5731704235076904, "learning_rate": 6.613550160868297e-06, "loss": 1.0242, "step": 5365 }, { "epoch": 0.4136601911810052, "grad_norm": 4.226243019104004, "learning_rate": 6.612368457910412e-06, "loss": 1.0554, "step": 5366 }, { "epoch": 0.4137372802960222, "grad_norm": 3.477130889892578, "learning_rate": 6.611186654429704e-06, "loss": 0.8728, "step": 5367 }, { "epoch": 0.41381436941103916, "grad_norm": 3.375094175338745, "learning_rate": 6.6100047504998535e-06, "loss": 0.9865, "step": 5368 }, { "epoch": 0.41389145852605613, "grad_norm": 3.527829647064209, "learning_rate": 6.6088227461945434e-06, "loss": 1.0235, "step": 5369 }, { "epoch": 0.4139685476410731, "grad_norm": 3.82974910736084, "learning_rate": 6.607640641587469e-06, "loss": 0.9777, "step": 5370 }, { "epoch": 0.41404563675609, "grad_norm": 3.5548770427703857, "learning_rate": 6.606458436752327e-06, "loss": 0.965, "step": 5371 }, { "epoch": 0.414122725871107, "grad_norm": 3.665213108062744, "learning_rate": 6.60527613176282e-06, "loss": 0.9712, "step": 5372 }, { "epoch": 0.41419981498612396, "grad_norm": 3.664933443069458, "learning_rate": 6.604093726692661e-06, "loss": 0.91, "step": 5373 }, { "epoch": 0.41427690410114093, "grad_norm": 3.6058082580566406, "learning_rate": 6.602911221615567e-06, "loss": 1.0525, "step": 5374 }, { "epoch": 0.4143539932161579, "grad_norm": 3.68048095703125, "learning_rate": 6.601728616605259e-06, "loss": 0.9375, "step": 5375 }, { "epoch": 0.4144310823311748, "grad_norm": 3.4503397941589355, "learning_rate": 6.600545911735468e-06, "loss": 0.9779, "step": 5376 }, { "epoch": 0.4145081714461918, "grad_norm": 3.573887586593628, "learning_rate": 6.599363107079927e-06, "loss": 0.9744, "step": 5377 }, { "epoch": 0.41458526056120876, "grad_norm": 3.5022692680358887, "learning_rate": 6.59818020271238e-06, "loss": 0.9208, "step": 5378 }, { "epoch": 0.4146623496762257, "grad_norm": 3.4239299297332764, "learning_rate": 6.596997198706576e-06, "loss": 0.9342, "step": 5379 }, { "epoch": 0.4147394387912427, "grad_norm": 4.041162014007568, "learning_rate": 6.595814095136267e-06, "loss": 0.8855, "step": 5380 }, { "epoch": 0.4148165279062596, "grad_norm": 4.2460761070251465, "learning_rate": 6.594630892075213e-06, "loss": 1.0153, "step": 5381 }, { "epoch": 0.4148936170212766, "grad_norm": 3.367917537689209, "learning_rate": 6.593447589597184e-06, "loss": 0.9544, "step": 5382 }, { "epoch": 0.41497070613629355, "grad_norm": 3.5523362159729004, "learning_rate": 6.5922641877759484e-06, "loss": 0.9909, "step": 5383 }, { "epoch": 0.4150477952513105, "grad_norm": 3.4318056106567383, "learning_rate": 6.591080686685289e-06, "loss": 0.9482, "step": 5384 }, { "epoch": 0.4151248843663275, "grad_norm": 3.5694563388824463, "learning_rate": 6.589897086398989e-06, "loss": 0.9606, "step": 5385 }, { "epoch": 0.4152019734813444, "grad_norm": 3.1972849369049072, "learning_rate": 6.588713386990837e-06, "loss": 0.9337, "step": 5386 }, { "epoch": 0.4152790625963614, "grad_norm": 3.958773136138916, "learning_rate": 6.5875295885346356e-06, "loss": 0.8853, "step": 5387 }, { "epoch": 0.41535615171137835, "grad_norm": 4.037240982055664, "learning_rate": 6.5863456911041865e-06, "loss": 1.0532, "step": 5388 }, { "epoch": 0.4154332408263953, "grad_norm": 3.808276891708374, "learning_rate": 6.585161694773301e-06, "loss": 1.0308, "step": 5389 }, { "epoch": 0.4155103299414123, "grad_norm": 3.6094319820404053, "learning_rate": 6.583977599615792e-06, "loss": 1.0333, "step": 5390 }, { "epoch": 0.4155874190564292, "grad_norm": 3.879518747329712, "learning_rate": 6.582793405705482e-06, "loss": 1.0241, "step": 5391 }, { "epoch": 0.4156645081714462, "grad_norm": 3.571319818496704, "learning_rate": 6.581609113116203e-06, "loss": 0.9956, "step": 5392 }, { "epoch": 0.41574159728646315, "grad_norm": 3.7715680599212646, "learning_rate": 6.580424721921785e-06, "loss": 0.9709, "step": 5393 }, { "epoch": 0.4158186864014801, "grad_norm": 3.707052230834961, "learning_rate": 6.579240232196073e-06, "loss": 0.9661, "step": 5394 }, { "epoch": 0.4158957755164971, "grad_norm": 3.3030142784118652, "learning_rate": 6.578055644012911e-06, "loss": 0.9728, "step": 5395 }, { "epoch": 0.415972864631514, "grad_norm": 3.5438342094421387, "learning_rate": 6.576870957446151e-06, "loss": 0.9302, "step": 5396 }, { "epoch": 0.416049953746531, "grad_norm": 3.5697009563446045, "learning_rate": 6.575686172569655e-06, "loss": 1.1335, "step": 5397 }, { "epoch": 0.41612704286154795, "grad_norm": 3.6315059661865234, "learning_rate": 6.574501289457287e-06, "loss": 1.1065, "step": 5398 }, { "epoch": 0.4162041319765649, "grad_norm": 3.3344013690948486, "learning_rate": 6.573316308182917e-06, "loss": 0.9045, "step": 5399 }, { "epoch": 0.4162812210915819, "grad_norm": 3.6112499237060547, "learning_rate": 6.5721312288204254e-06, "loss": 1.0306, "step": 5400 }, { "epoch": 0.4163583102065988, "grad_norm": 3.63188099861145, "learning_rate": 6.570946051443693e-06, "loss": 0.9627, "step": 5401 }, { "epoch": 0.4164353993216158, "grad_norm": 3.795747995376587, "learning_rate": 6.5697607761266105e-06, "loss": 1.1105, "step": 5402 }, { "epoch": 0.41651248843663274, "grad_norm": 3.7781665325164795, "learning_rate": 6.568575402943073e-06, "loss": 0.9043, "step": 5403 }, { "epoch": 0.4165895775516497, "grad_norm": 3.3593337535858154, "learning_rate": 6.567389931966983e-06, "loss": 1.0182, "step": 5404 }, { "epoch": 0.4166666666666667, "grad_norm": 4.008146286010742, "learning_rate": 6.566204363272248e-06, "loss": 1.0122, "step": 5405 }, { "epoch": 0.4167437557816836, "grad_norm": 3.6243855953216553, "learning_rate": 6.565018696932786e-06, "loss": 0.9194, "step": 5406 }, { "epoch": 0.41682084489670057, "grad_norm": 3.93573260307312, "learning_rate": 6.5638329330225096e-06, "loss": 0.9881, "step": 5407 }, { "epoch": 0.41689793401171754, "grad_norm": 3.9312803745269775, "learning_rate": 6.5626470716153515e-06, "loss": 0.9584, "step": 5408 }, { "epoch": 0.4169750231267345, "grad_norm": 3.7972381114959717, "learning_rate": 6.561461112785239e-06, "loss": 0.8669, "step": 5409 }, { "epoch": 0.4170521122417515, "grad_norm": 3.8700931072235107, "learning_rate": 6.5602750566061154e-06, "loss": 1.0306, "step": 5410 }, { "epoch": 0.4171292013567684, "grad_norm": 3.5603837966918945, "learning_rate": 6.559088903151923e-06, "loss": 1.0485, "step": 5411 }, { "epoch": 0.41720629047178537, "grad_norm": 3.537443161010742, "learning_rate": 6.5579026524966106e-06, "loss": 1.0481, "step": 5412 }, { "epoch": 0.41728337958680234, "grad_norm": 3.662034511566162, "learning_rate": 6.5567163047141395e-06, "loss": 1.0871, "step": 5413 }, { "epoch": 0.4173604687018193, "grad_norm": 3.207895517349243, "learning_rate": 6.555529859878466e-06, "loss": 0.8474, "step": 5414 }, { "epoch": 0.4174375578168363, "grad_norm": 3.5553321838378906, "learning_rate": 6.554343318063563e-06, "loss": 1.0164, "step": 5415 }, { "epoch": 0.4175146469318532, "grad_norm": 4.284764766693115, "learning_rate": 6.553156679343404e-06, "loss": 0.9951, "step": 5416 }, { "epoch": 0.41759173604687017, "grad_norm": 3.8002851009368896, "learning_rate": 6.551969943791972e-06, "loss": 1.0258, "step": 5417 }, { "epoch": 0.41766882516188714, "grad_norm": 4.005855560302734, "learning_rate": 6.550783111483249e-06, "loss": 0.9425, "step": 5418 }, { "epoch": 0.4177459142769041, "grad_norm": 3.5410430431365967, "learning_rate": 6.549596182491233e-06, "loss": 0.9774, "step": 5419 }, { "epoch": 0.4178230033919211, "grad_norm": 3.704803943634033, "learning_rate": 6.548409156889919e-06, "loss": 0.9802, "step": 5420 }, { "epoch": 0.41790009250693805, "grad_norm": 3.7099010944366455, "learning_rate": 6.5472220347533145e-06, "loss": 0.9581, "step": 5421 }, { "epoch": 0.41797718162195496, "grad_norm": 3.5973217487335205, "learning_rate": 6.546034816155429e-06, "loss": 0.9321, "step": 5422 }, { "epoch": 0.41805427073697193, "grad_norm": 4.097982406616211, "learning_rate": 6.54484750117028e-06, "loss": 1.0699, "step": 5423 }, { "epoch": 0.4181313598519889, "grad_norm": 3.9153554439544678, "learning_rate": 6.543660089871891e-06, "loss": 0.9045, "step": 5424 }, { "epoch": 0.4182084489670059, "grad_norm": 3.366682767868042, "learning_rate": 6.5424725823342895e-06, "loss": 1.0124, "step": 5425 }, { "epoch": 0.41828553808202285, "grad_norm": 3.677605390548706, "learning_rate": 6.54128497863151e-06, "loss": 1.0661, "step": 5426 }, { "epoch": 0.41836262719703976, "grad_norm": 3.49324369430542, "learning_rate": 6.540097278837596e-06, "loss": 0.9011, "step": 5427 }, { "epoch": 0.41843971631205673, "grad_norm": 3.3589603900909424, "learning_rate": 6.538909483026593e-06, "loss": 0.9454, "step": 5428 }, { "epoch": 0.4185168054270737, "grad_norm": 3.4064254760742188, "learning_rate": 6.537721591272553e-06, "loss": 0.9948, "step": 5429 }, { "epoch": 0.41859389454209067, "grad_norm": 3.785304546356201, "learning_rate": 6.536533603649536e-06, "loss": 0.9664, "step": 5430 }, { "epoch": 0.41867098365710764, "grad_norm": 3.5690832138061523, "learning_rate": 6.5353455202316075e-06, "loss": 0.9863, "step": 5431 }, { "epoch": 0.41874807277212456, "grad_norm": 3.667205572128296, "learning_rate": 6.5341573410928376e-06, "loss": 1.0341, "step": 5432 }, { "epoch": 0.41882516188714153, "grad_norm": 3.1317427158355713, "learning_rate": 6.532969066307302e-06, "loss": 0.9154, "step": 5433 }, { "epoch": 0.4189022510021585, "grad_norm": 3.749356746673584, "learning_rate": 6.531780695949086e-06, "loss": 0.9678, "step": 5434 }, { "epoch": 0.41897934011717547, "grad_norm": 3.7041513919830322, "learning_rate": 6.530592230092276e-06, "loss": 1.0266, "step": 5435 }, { "epoch": 0.41905642923219244, "grad_norm": 3.199434518814087, "learning_rate": 6.529403668810968e-06, "loss": 0.964, "step": 5436 }, { "epoch": 0.41913351834720936, "grad_norm": 3.5290520191192627, "learning_rate": 6.528215012179262e-06, "loss": 0.9869, "step": 5437 }, { "epoch": 0.4192106074622263, "grad_norm": 3.9519081115722656, "learning_rate": 6.527026260271265e-06, "loss": 1.0839, "step": 5438 }, { "epoch": 0.4192876965772433, "grad_norm": 3.520599603652954, "learning_rate": 6.525837413161089e-06, "loss": 1.0857, "step": 5439 }, { "epoch": 0.41936478569226027, "grad_norm": 3.515936851501465, "learning_rate": 6.524648470922854e-06, "loss": 1.067, "step": 5440 }, { "epoch": 0.41944187480727724, "grad_norm": 3.486137628555298, "learning_rate": 6.523459433630681e-06, "loss": 0.9586, "step": 5441 }, { "epoch": 0.41951896392229415, "grad_norm": 3.7735965251922607, "learning_rate": 6.522270301358704e-06, "loss": 1.0085, "step": 5442 }, { "epoch": 0.4195960530373111, "grad_norm": 3.485811948776245, "learning_rate": 6.521081074181058e-06, "loss": 0.9666, "step": 5443 }, { "epoch": 0.4196731421523281, "grad_norm": 3.6323955059051514, "learning_rate": 6.519891752171884e-06, "loss": 1.0444, "step": 5444 }, { "epoch": 0.41975023126734506, "grad_norm": 3.7233123779296875, "learning_rate": 6.518702335405331e-06, "loss": 0.9007, "step": 5445 }, { "epoch": 0.41982732038236203, "grad_norm": 3.387659788131714, "learning_rate": 6.517512823955554e-06, "loss": 0.9503, "step": 5446 }, { "epoch": 0.41990440949737895, "grad_norm": 3.6936397552490234, "learning_rate": 6.516323217896712e-06, "loss": 1.012, "step": 5447 }, { "epoch": 0.4199814986123959, "grad_norm": 3.6477158069610596, "learning_rate": 6.515133517302969e-06, "loss": 1.0494, "step": 5448 }, { "epoch": 0.4200585877274129, "grad_norm": 3.4203903675079346, "learning_rate": 6.513943722248499e-06, "loss": 0.8738, "step": 5449 }, { "epoch": 0.42013567684242986, "grad_norm": 3.43131160736084, "learning_rate": 6.512753832807479e-06, "loss": 0.8789, "step": 5450 }, { "epoch": 0.42021276595744683, "grad_norm": 3.5120580196380615, "learning_rate": 6.5115638490540925e-06, "loss": 0.9803, "step": 5451 }, { "epoch": 0.42028985507246375, "grad_norm": 3.5579140186309814, "learning_rate": 6.510373771062527e-06, "loss": 0.893, "step": 5452 }, { "epoch": 0.4203669441874807, "grad_norm": 4.425456523895264, "learning_rate": 6.50918359890698e-06, "loss": 0.9491, "step": 5453 }, { "epoch": 0.4204440333024977, "grad_norm": 3.6898512840270996, "learning_rate": 6.507993332661653e-06, "loss": 0.9606, "step": 5454 }, { "epoch": 0.42052112241751466, "grad_norm": 3.4359302520751953, "learning_rate": 6.506802972400751e-06, "loss": 0.9471, "step": 5455 }, { "epoch": 0.42059821153253163, "grad_norm": 3.5586142539978027, "learning_rate": 6.5056125181984885e-06, "loss": 0.958, "step": 5456 }, { "epoch": 0.42067530064754854, "grad_norm": 3.591996192932129, "learning_rate": 6.5044219701290825e-06, "loss": 1.0096, "step": 5457 }, { "epoch": 0.4207523897625655, "grad_norm": 3.7730772495269775, "learning_rate": 6.503231328266758e-06, "loss": 0.9814, "step": 5458 }, { "epoch": 0.4208294788775825, "grad_norm": 3.378232717514038, "learning_rate": 6.5020405926857465e-06, "loss": 0.915, "step": 5459 }, { "epoch": 0.42090656799259946, "grad_norm": 3.841001033782959, "learning_rate": 6.500849763460283e-06, "loss": 1.0572, "step": 5460 }, { "epoch": 0.4209836571076164, "grad_norm": 3.5528838634490967, "learning_rate": 6.4996588406646096e-06, "loss": 1.0324, "step": 5461 }, { "epoch": 0.42106074622263334, "grad_norm": 3.5531325340270996, "learning_rate": 6.498467824372974e-06, "loss": 0.9234, "step": 5462 }, { "epoch": 0.4211378353376503, "grad_norm": 3.503661870956421, "learning_rate": 6.497276714659631e-06, "loss": 0.9227, "step": 5463 }, { "epoch": 0.4212149244526673, "grad_norm": 3.5585978031158447, "learning_rate": 6.49608551159884e-06, "loss": 0.9453, "step": 5464 }, { "epoch": 0.42129201356768425, "grad_norm": 3.890265941619873, "learning_rate": 6.494894215264864e-06, "loss": 0.9202, "step": 5465 }, { "epoch": 0.4213691026827012, "grad_norm": 3.9904141426086426, "learning_rate": 6.493702825731977e-06, "loss": 0.9892, "step": 5466 }, { "epoch": 0.42144619179771814, "grad_norm": 3.911604881286621, "learning_rate": 6.492511343074456e-06, "loss": 1.007, "step": 5467 }, { "epoch": 0.4215232809127351, "grad_norm": 3.6176915168762207, "learning_rate": 6.491319767366581e-06, "loss": 1.0362, "step": 5468 }, { "epoch": 0.4216003700277521, "grad_norm": 3.815973997116089, "learning_rate": 6.490128098682642e-06, "loss": 0.9862, "step": 5469 }, { "epoch": 0.42167745914276905, "grad_norm": 4.3016037940979, "learning_rate": 6.488936337096935e-06, "loss": 0.9584, "step": 5470 }, { "epoch": 0.421754548257786, "grad_norm": 3.49375319480896, "learning_rate": 6.487744482683758e-06, "loss": 0.9048, "step": 5471 }, { "epoch": 0.42183163737280294, "grad_norm": 4.007232189178467, "learning_rate": 6.486552535517419e-06, "loss": 1.0449, "step": 5472 }, { "epoch": 0.4219087264878199, "grad_norm": 3.4516263008117676, "learning_rate": 6.485360495672226e-06, "loss": 0.9911, "step": 5473 }, { "epoch": 0.4219858156028369, "grad_norm": 3.6937084197998047, "learning_rate": 6.4841683632225005e-06, "loss": 0.9007, "step": 5474 }, { "epoch": 0.42206290471785385, "grad_norm": 4.0262627601623535, "learning_rate": 6.482976138242564e-06, "loss": 1.1981, "step": 5475 }, { "epoch": 0.4221399938328708, "grad_norm": 3.6597800254821777, "learning_rate": 6.481783820806745e-06, "loss": 0.898, "step": 5476 }, { "epoch": 0.42221708294788773, "grad_norm": 3.7206947803497314, "learning_rate": 6.4805914109893805e-06, "loss": 0.9356, "step": 5477 }, { "epoch": 0.4222941720629047, "grad_norm": 3.3729729652404785, "learning_rate": 6.479398908864809e-06, "loss": 0.9129, "step": 5478 }, { "epoch": 0.4223712611779217, "grad_norm": 3.6832430362701416, "learning_rate": 6.478206314507378e-06, "loss": 0.9142, "step": 5479 }, { "epoch": 0.42244835029293865, "grad_norm": 3.764679431915283, "learning_rate": 6.47701362799144e-06, "loss": 1.0789, "step": 5480 }, { "epoch": 0.4225254394079556, "grad_norm": 3.5273795127868652, "learning_rate": 6.47582084939135e-06, "loss": 0.9582, "step": 5481 }, { "epoch": 0.42260252852297253, "grad_norm": 3.7401888370513916, "learning_rate": 6.474627978781474e-06, "loss": 1.0972, "step": 5482 }, { "epoch": 0.4226796176379895, "grad_norm": 3.4443490505218506, "learning_rate": 6.473435016236181e-06, "loss": 0.9941, "step": 5483 }, { "epoch": 0.4227567067530065, "grad_norm": 3.8706183433532715, "learning_rate": 6.472241961829846e-06, "loss": 0.9752, "step": 5484 }, { "epoch": 0.42283379586802344, "grad_norm": 3.611464023590088, "learning_rate": 6.47104881563685e-06, "loss": 0.9907, "step": 5485 }, { "epoch": 0.4229108849830404, "grad_norm": 3.5316667556762695, "learning_rate": 6.469855577731579e-06, "loss": 0.8496, "step": 5486 }, { "epoch": 0.42298797409805733, "grad_norm": 3.4962635040283203, "learning_rate": 6.468662248188424e-06, "loss": 1.0317, "step": 5487 }, { "epoch": 0.4230650632130743, "grad_norm": 4.126564025878906, "learning_rate": 6.467468827081786e-06, "loss": 0.9859, "step": 5488 }, { "epoch": 0.42314215232809127, "grad_norm": 3.768683671951294, "learning_rate": 6.466275314486066e-06, "loss": 0.8975, "step": 5489 }, { "epoch": 0.42321924144310824, "grad_norm": 3.997591733932495, "learning_rate": 6.4650817104756735e-06, "loss": 0.9813, "step": 5490 }, { "epoch": 0.4232963305581252, "grad_norm": 3.633054494857788, "learning_rate": 6.463888015125026e-06, "loss": 0.8793, "step": 5491 }, { "epoch": 0.4233734196731421, "grad_norm": 3.5406949520111084, "learning_rate": 6.4626942285085414e-06, "loss": 0.8675, "step": 5492 }, { "epoch": 0.4234505087881591, "grad_norm": 3.7187070846557617, "learning_rate": 6.461500350700648e-06, "loss": 1.0153, "step": 5493 }, { "epoch": 0.42352759790317607, "grad_norm": 3.6261672973632812, "learning_rate": 6.4603063817757746e-06, "loss": 0.9925, "step": 5494 }, { "epoch": 0.42360468701819304, "grad_norm": 3.54945969581604, "learning_rate": 6.459112321808363e-06, "loss": 1.0348, "step": 5495 }, { "epoch": 0.42368177613321, "grad_norm": 3.686965227127075, "learning_rate": 6.457918170872855e-06, "loss": 0.9913, "step": 5496 }, { "epoch": 0.4237588652482269, "grad_norm": 3.7915258407592773, "learning_rate": 6.4567239290437e-06, "loss": 1.025, "step": 5497 }, { "epoch": 0.4238359543632439, "grad_norm": 3.7425386905670166, "learning_rate": 6.455529596395353e-06, "loss": 0.9447, "step": 5498 }, { "epoch": 0.42391304347826086, "grad_norm": 3.5631515979766846, "learning_rate": 6.454335173002273e-06, "loss": 0.9225, "step": 5499 }, { "epoch": 0.42399013259327784, "grad_norm": 3.4709506034851074, "learning_rate": 6.4531406589389275e-06, "loss": 0.9846, "step": 5500 }, { "epoch": 0.4240672217082948, "grad_norm": 3.696099281311035, "learning_rate": 6.451946054279788e-06, "loss": 0.9241, "step": 5501 }, { "epoch": 0.4241443108233117, "grad_norm": 3.3619208335876465, "learning_rate": 6.450751359099332e-06, "loss": 0.9098, "step": 5502 }, { "epoch": 0.4242213999383287, "grad_norm": 3.8258392810821533, "learning_rate": 6.449556573472042e-06, "loss": 1.1477, "step": 5503 }, { "epoch": 0.42429848905334566, "grad_norm": 3.742774724960327, "learning_rate": 6.448361697472408e-06, "loss": 1.0046, "step": 5504 }, { "epoch": 0.42437557816836263, "grad_norm": 3.6903765201568604, "learning_rate": 6.447166731174923e-06, "loss": 0.9383, "step": 5505 }, { "epoch": 0.4244526672833796, "grad_norm": 4.07509183883667, "learning_rate": 6.445971674654087e-06, "loss": 1.0482, "step": 5506 }, { "epoch": 0.4245297563983966, "grad_norm": 3.969420909881592, "learning_rate": 6.444776527984406e-06, "loss": 0.9808, "step": 5507 }, { "epoch": 0.4246068455134135, "grad_norm": 3.540527820587158, "learning_rate": 6.443581291240392e-06, "loss": 0.9194, "step": 5508 }, { "epoch": 0.42468393462843046, "grad_norm": 3.800278663635254, "learning_rate": 6.44238596449656e-06, "loss": 0.9855, "step": 5509 }, { "epoch": 0.42476102374344743, "grad_norm": 3.841402769088745, "learning_rate": 6.441190547827434e-06, "loss": 1.0468, "step": 5510 }, { "epoch": 0.4248381128584644, "grad_norm": 3.673065423965454, "learning_rate": 6.439995041307541e-06, "loss": 1.0121, "step": 5511 }, { "epoch": 0.42491520197348137, "grad_norm": 3.586517810821533, "learning_rate": 6.438799445011415e-06, "loss": 1.0284, "step": 5512 }, { "epoch": 0.4249922910884983, "grad_norm": 4.116529941558838, "learning_rate": 6.4376037590135955e-06, "loss": 1.06, "step": 5513 }, { "epoch": 0.42506938020351526, "grad_norm": 3.565775156021118, "learning_rate": 6.436407983388627e-06, "loss": 0.9655, "step": 5514 }, { "epoch": 0.4251464693185322, "grad_norm": 3.403215169906616, "learning_rate": 6.435212118211062e-06, "loss": 0.9076, "step": 5515 }, { "epoch": 0.4252235584335492, "grad_norm": 4.086001396179199, "learning_rate": 6.434016163555452e-06, "loss": 1.0058, "step": 5516 }, { "epoch": 0.42530064754856617, "grad_norm": 3.650869131088257, "learning_rate": 6.432820119496363e-06, "loss": 0.941, "step": 5517 }, { "epoch": 0.4253777366635831, "grad_norm": 3.593628168106079, "learning_rate": 6.431623986108359e-06, "loss": 1.0367, "step": 5518 }, { "epoch": 0.42545482577860005, "grad_norm": 3.7632596492767334, "learning_rate": 6.430427763466014e-06, "loss": 0.9751, "step": 5519 }, { "epoch": 0.425531914893617, "grad_norm": 3.490562677383423, "learning_rate": 6.429231451643907e-06, "loss": 0.9234, "step": 5520 }, { "epoch": 0.425609004008634, "grad_norm": 3.740706443786621, "learning_rate": 6.428035050716621e-06, "loss": 1.023, "step": 5521 }, { "epoch": 0.42568609312365097, "grad_norm": 3.898951292037964, "learning_rate": 6.426838560758746e-06, "loss": 0.9387, "step": 5522 }, { "epoch": 0.4257631822386679, "grad_norm": 3.8880972862243652, "learning_rate": 6.425641981844876e-06, "loss": 0.8878, "step": 5523 }, { "epoch": 0.42584027135368485, "grad_norm": 3.443209648132324, "learning_rate": 6.424445314049611e-06, "loss": 0.8749, "step": 5524 }, { "epoch": 0.4259173604687018, "grad_norm": 4.383835315704346, "learning_rate": 6.42324855744756e-06, "loss": 1.0518, "step": 5525 }, { "epoch": 0.4259944495837188, "grad_norm": 3.5138866901397705, "learning_rate": 6.422051712113332e-06, "loss": 1.003, "step": 5526 }, { "epoch": 0.42607153869873576, "grad_norm": 3.474919080734253, "learning_rate": 6.420854778121543e-06, "loss": 1.0002, "step": 5527 }, { "epoch": 0.4261486278137527, "grad_norm": 3.776796817779541, "learning_rate": 6.41965775554682e-06, "loss": 1.0651, "step": 5528 }, { "epoch": 0.42622571692876965, "grad_norm": 3.249838352203369, "learning_rate": 6.418460644463787e-06, "loss": 0.9011, "step": 5529 }, { "epoch": 0.4263028060437866, "grad_norm": 3.8011159896850586, "learning_rate": 6.41726344494708e-06, "loss": 0.8613, "step": 5530 }, { "epoch": 0.4263798951588036, "grad_norm": 4.086413383483887, "learning_rate": 6.416066157071338e-06, "loss": 1.0941, "step": 5531 }, { "epoch": 0.42645698427382056, "grad_norm": 3.662161111831665, "learning_rate": 6.414868780911203e-06, "loss": 1.0212, "step": 5532 }, { "epoch": 0.4265340733888375, "grad_norm": 3.3614611625671387, "learning_rate": 6.41367131654133e-06, "loss": 0.925, "step": 5533 }, { "epoch": 0.42661116250385445, "grad_norm": 3.57147216796875, "learning_rate": 6.41247376403637e-06, "loss": 0.9396, "step": 5534 }, { "epoch": 0.4266882516188714, "grad_norm": 3.516568422317505, "learning_rate": 6.4112761234709866e-06, "loss": 0.8563, "step": 5535 }, { "epoch": 0.4267653407338884, "grad_norm": 3.589931011199951, "learning_rate": 6.4100783949198465e-06, "loss": 0.9974, "step": 5536 }, { "epoch": 0.42684242984890536, "grad_norm": 3.5021722316741943, "learning_rate": 6.408880578457622e-06, "loss": 1.0911, "step": 5537 }, { "epoch": 0.4269195189639223, "grad_norm": 3.5328221321105957, "learning_rate": 6.407682674158988e-06, "loss": 0.8483, "step": 5538 }, { "epoch": 0.42699660807893924, "grad_norm": 4.008224010467529, "learning_rate": 6.406484682098632e-06, "loss": 1.0525, "step": 5539 }, { "epoch": 0.4270736971939562, "grad_norm": 3.6036767959594727, "learning_rate": 6.40528660235124e-06, "loss": 0.9958, "step": 5540 }, { "epoch": 0.4271507863089732, "grad_norm": 4.159304618835449, "learning_rate": 6.404088434991504e-06, "loss": 0.9268, "step": 5541 }, { "epoch": 0.42722787542399016, "grad_norm": 4.040691375732422, "learning_rate": 6.402890180094129e-06, "loss": 1.1215, "step": 5542 }, { "epoch": 0.42730496453900707, "grad_norm": 3.66817307472229, "learning_rate": 6.401691837733815e-06, "loss": 0.9277, "step": 5543 }, { "epoch": 0.42738205365402404, "grad_norm": 4.073860168457031, "learning_rate": 6.4004934079852775e-06, "loss": 0.986, "step": 5544 }, { "epoch": 0.427459142769041, "grad_norm": 3.9062860012054443, "learning_rate": 6.399294890923227e-06, "loss": 1.0575, "step": 5545 }, { "epoch": 0.427536231884058, "grad_norm": 3.5801217555999756, "learning_rate": 6.398096286622388e-06, "loss": 0.9702, "step": 5546 }, { "epoch": 0.42761332099907495, "grad_norm": 3.972623825073242, "learning_rate": 6.396897595157487e-06, "loss": 0.9699, "step": 5547 }, { "epoch": 0.42769041011409187, "grad_norm": 3.516526222229004, "learning_rate": 6.395698816603253e-06, "loss": 0.9262, "step": 5548 }, { "epoch": 0.42776749922910884, "grad_norm": 3.5756266117095947, "learning_rate": 6.39449995103443e-06, "loss": 0.9331, "step": 5549 }, { "epoch": 0.4278445883441258, "grad_norm": 3.659475326538086, "learning_rate": 6.393300998525754e-06, "loss": 0.8752, "step": 5550 }, { "epoch": 0.4279216774591428, "grad_norm": 4.002374649047852, "learning_rate": 6.392101959151978e-06, "loss": 1.1071, "step": 5551 }, { "epoch": 0.42799876657415975, "grad_norm": 3.4947450160980225, "learning_rate": 6.390902832987857e-06, "loss": 0.9502, "step": 5552 }, { "epoch": 0.42807585568917667, "grad_norm": 3.6659510135650635, "learning_rate": 6.389703620108145e-06, "loss": 0.9983, "step": 5553 }, { "epoch": 0.42815294480419364, "grad_norm": 3.892176628112793, "learning_rate": 6.388504320587611e-06, "loss": 0.9481, "step": 5554 }, { "epoch": 0.4282300339192106, "grad_norm": 3.580537796020508, "learning_rate": 6.387304934501024e-06, "loss": 0.9526, "step": 5555 }, { "epoch": 0.4283071230342276, "grad_norm": 3.420477867126465, "learning_rate": 6.386105461923159e-06, "loss": 0.9092, "step": 5556 }, { "epoch": 0.42838421214924455, "grad_norm": 3.7150497436523438, "learning_rate": 6.384905902928797e-06, "loss": 1.0413, "step": 5557 }, { "epoch": 0.42846130126426146, "grad_norm": 3.4000418186187744, "learning_rate": 6.383706257592725e-06, "loss": 0.9897, "step": 5558 }, { "epoch": 0.42853839037927843, "grad_norm": 3.336329698562622, "learning_rate": 6.382506525989734e-06, "loss": 1.0003, "step": 5559 }, { "epoch": 0.4286154794942954, "grad_norm": 3.510364055633545, "learning_rate": 6.381306708194622e-06, "loss": 0.8863, "step": 5560 }, { "epoch": 0.4286925686093124, "grad_norm": 3.6670730113983154, "learning_rate": 6.38010680428219e-06, "loss": 1.0318, "step": 5561 }, { "epoch": 0.42876965772432934, "grad_norm": 3.3945372104644775, "learning_rate": 6.378906814327246e-06, "loss": 1.0504, "step": 5562 }, { "epoch": 0.42884674683934626, "grad_norm": 4.040469169616699, "learning_rate": 6.377706738404604e-06, "loss": 1.1041, "step": 5563 }, { "epoch": 0.42892383595436323, "grad_norm": 4.579511642456055, "learning_rate": 6.376506576589082e-06, "loss": 1.0169, "step": 5564 }, { "epoch": 0.4290009250693802, "grad_norm": 3.5444085597991943, "learning_rate": 6.375306328955506e-06, "loss": 1.0349, "step": 5565 }, { "epoch": 0.42907801418439717, "grad_norm": 3.6533939838409424, "learning_rate": 6.374105995578701e-06, "loss": 0.9106, "step": 5566 }, { "epoch": 0.42915510329941414, "grad_norm": 3.527340888977051, "learning_rate": 6.372905576533505e-06, "loss": 0.9116, "step": 5567 }, { "epoch": 0.42923219241443106, "grad_norm": 3.571380853652954, "learning_rate": 6.371705071894756e-06, "loss": 1.0186, "step": 5568 }, { "epoch": 0.42930928152944803, "grad_norm": 3.8140509128570557, "learning_rate": 6.3705044817373006e-06, "loss": 0.9015, "step": 5569 }, { "epoch": 0.429386370644465, "grad_norm": 3.546802520751953, "learning_rate": 6.369303806135989e-06, "loss": 1.0005, "step": 5570 }, { "epoch": 0.42946345975948197, "grad_norm": 3.889112949371338, "learning_rate": 6.368103045165677e-06, "loss": 1.0829, "step": 5571 }, { "epoch": 0.42954054887449894, "grad_norm": 3.435272216796875, "learning_rate": 6.366902198901225e-06, "loss": 1.0089, "step": 5572 }, { "epoch": 0.42961763798951585, "grad_norm": 3.3787803649902344, "learning_rate": 6.365701267417501e-06, "loss": 0.934, "step": 5573 }, { "epoch": 0.4296947271045328, "grad_norm": 3.8699758052825928, "learning_rate": 6.364500250789375e-06, "loss": 1.1315, "step": 5574 }, { "epoch": 0.4297718162195498, "grad_norm": 3.6572110652923584, "learning_rate": 6.363299149091726e-06, "loss": 1.0157, "step": 5575 }, { "epoch": 0.42984890533456677, "grad_norm": 3.4599862098693848, "learning_rate": 6.362097962399436e-06, "loss": 0.9634, "step": 5576 }, { "epoch": 0.42992599444958374, "grad_norm": 3.603281259536743, "learning_rate": 6.3608966907873925e-06, "loss": 0.9877, "step": 5577 }, { "epoch": 0.43000308356460065, "grad_norm": 3.7701170444488525, "learning_rate": 6.359695334330488e-06, "loss": 1.0674, "step": 5578 }, { "epoch": 0.4300801726796176, "grad_norm": 3.7312378883361816, "learning_rate": 6.358493893103623e-06, "loss": 0.9849, "step": 5579 }, { "epoch": 0.4301572617946346, "grad_norm": 3.3905229568481445, "learning_rate": 6.357292367181698e-06, "loss": 0.8549, "step": 5580 }, { "epoch": 0.43023435090965156, "grad_norm": 3.478235960006714, "learning_rate": 6.356090756639623e-06, "loss": 0.8909, "step": 5581 }, { "epoch": 0.43031144002466853, "grad_norm": 3.4896721839904785, "learning_rate": 6.354889061552314e-06, "loss": 0.9935, "step": 5582 }, { "epoch": 0.43038852913968545, "grad_norm": 3.669250965118408, "learning_rate": 6.353687281994688e-06, "loss": 1.0689, "step": 5583 }, { "epoch": 0.4304656182547024, "grad_norm": 3.8079261779785156, "learning_rate": 6.352485418041673e-06, "loss": 0.9981, "step": 5584 }, { "epoch": 0.4305427073697194, "grad_norm": 3.5822927951812744, "learning_rate": 6.351283469768195e-06, "loss": 1.0457, "step": 5585 }, { "epoch": 0.43061979648473636, "grad_norm": 3.62357759475708, "learning_rate": 6.350081437249191e-06, "loss": 0.9444, "step": 5586 }, { "epoch": 0.43069688559975333, "grad_norm": 3.627352476119995, "learning_rate": 6.348879320559602e-06, "loss": 0.9715, "step": 5587 }, { "epoch": 0.43077397471477025, "grad_norm": 3.5861945152282715, "learning_rate": 6.3476771197743735e-06, "loss": 0.9506, "step": 5588 }, { "epoch": 0.4308510638297872, "grad_norm": 3.5179619789123535, "learning_rate": 6.346474834968458e-06, "loss": 1.0651, "step": 5589 }, { "epoch": 0.4309281529448042, "grad_norm": 4.048654556274414, "learning_rate": 6.345272466216807e-06, "loss": 1.0671, "step": 5590 }, { "epoch": 0.43100524205982116, "grad_norm": 3.6038835048675537, "learning_rate": 6.344070013594388e-06, "loss": 1.0723, "step": 5591 }, { "epoch": 0.43108233117483813, "grad_norm": 3.3801581859588623, "learning_rate": 6.342867477176164e-06, "loss": 0.9918, "step": 5592 }, { "epoch": 0.4311594202898551, "grad_norm": 4.134720325469971, "learning_rate": 6.3416648570371065e-06, "loss": 1.0093, "step": 5593 }, { "epoch": 0.431236509404872, "grad_norm": 3.704859495162964, "learning_rate": 6.340462153252195e-06, "loss": 0.953, "step": 5594 }, { "epoch": 0.431313598519889, "grad_norm": 3.9466912746429443, "learning_rate": 6.339259365896411e-06, "loss": 1.0878, "step": 5595 }, { "epoch": 0.43139068763490596, "grad_norm": 3.7357873916625977, "learning_rate": 6.33805649504474e-06, "loss": 0.9632, "step": 5596 }, { "epoch": 0.4314677767499229, "grad_norm": 3.6938416957855225, "learning_rate": 6.336853540772178e-06, "loss": 0.9337, "step": 5597 }, { "epoch": 0.4315448658649399, "grad_norm": 3.942068099975586, "learning_rate": 6.335650503153721e-06, "loss": 0.974, "step": 5598 }, { "epoch": 0.4316219549799568, "grad_norm": 4.033871650695801, "learning_rate": 6.334447382264372e-06, "loss": 0.9456, "step": 5599 }, { "epoch": 0.4316990440949738, "grad_norm": 3.7592177391052246, "learning_rate": 6.333244178179141e-06, "loss": 1.0018, "step": 5600 }, { "epoch": 0.43177613320999075, "grad_norm": 3.3277747631073, "learning_rate": 6.3320408909730405e-06, "loss": 0.9049, "step": 5601 }, { "epoch": 0.4318532223250077, "grad_norm": 3.5727672576904297, "learning_rate": 6.330837520721088e-06, "loss": 0.9017, "step": 5602 }, { "epoch": 0.4319303114400247, "grad_norm": 3.610518455505371, "learning_rate": 6.329634067498311e-06, "loss": 1.0287, "step": 5603 }, { "epoch": 0.4320074005550416, "grad_norm": 3.8098137378692627, "learning_rate": 6.3284305313797335e-06, "loss": 0.9755, "step": 5604 }, { "epoch": 0.4320844896700586, "grad_norm": 3.6883254051208496, "learning_rate": 6.327226912440394e-06, "loss": 1.021, "step": 5605 }, { "epoch": 0.43216157878507555, "grad_norm": 3.6646480560302734, "learning_rate": 6.32602321075533e-06, "loss": 0.9877, "step": 5606 }, { "epoch": 0.4322386679000925, "grad_norm": 3.433976173400879, "learning_rate": 6.324819426399587e-06, "loss": 0.9482, "step": 5607 }, { "epoch": 0.4323157570151095, "grad_norm": 3.4387431144714355, "learning_rate": 6.323615559448213e-06, "loss": 0.8817, "step": 5608 }, { "epoch": 0.4323928461301264, "grad_norm": 3.4525887966156006, "learning_rate": 6.322411609976265e-06, "loss": 0.8509, "step": 5609 }, { "epoch": 0.4324699352451434, "grad_norm": 3.7124297618865967, "learning_rate": 6.321207578058803e-06, "loss": 0.962, "step": 5610 }, { "epoch": 0.43254702436016035, "grad_norm": 3.817814350128174, "learning_rate": 6.32000346377089e-06, "loss": 1.051, "step": 5611 }, { "epoch": 0.4326241134751773, "grad_norm": 3.616466760635376, "learning_rate": 6.318799267187596e-06, "loss": 0.9733, "step": 5612 }, { "epoch": 0.4327012025901943, "grad_norm": 3.7230231761932373, "learning_rate": 6.317594988384e-06, "loss": 1.0263, "step": 5613 }, { "epoch": 0.4327782917052112, "grad_norm": 3.632129430770874, "learning_rate": 6.31639062743518e-06, "loss": 0.9545, "step": 5614 }, { "epoch": 0.4328553808202282, "grad_norm": 4.578291416168213, "learning_rate": 6.315186184416222e-06, "loss": 1.0831, "step": 5615 }, { "epoch": 0.43293246993524515, "grad_norm": 3.434826374053955, "learning_rate": 6.313981659402218e-06, "loss": 0.9454, "step": 5616 }, { "epoch": 0.4330095590502621, "grad_norm": 3.2984015941619873, "learning_rate": 6.312777052468262e-06, "loss": 0.9104, "step": 5617 }, { "epoch": 0.4330866481652791, "grad_norm": 3.349336862564087, "learning_rate": 6.3115723636894565e-06, "loss": 1.0299, "step": 5618 }, { "epoch": 0.433163737280296, "grad_norm": 3.6265594959259033, "learning_rate": 6.310367593140906e-06, "loss": 0.9828, "step": 5619 }, { "epoch": 0.433240826395313, "grad_norm": 3.9649040699005127, "learning_rate": 6.3091627408977215e-06, "loss": 0.9659, "step": 5620 }, { "epoch": 0.43331791551032994, "grad_norm": 3.6127185821533203, "learning_rate": 6.3079578070350235e-06, "loss": 0.8698, "step": 5621 }, { "epoch": 0.4333950046253469, "grad_norm": 3.442434310913086, "learning_rate": 6.306752791627928e-06, "loss": 0.8926, "step": 5622 }, { "epoch": 0.4334720937403639, "grad_norm": 3.6911134719848633, "learning_rate": 6.3055476947515635e-06, "loss": 1.0027, "step": 5623 }, { "epoch": 0.4335491828553808, "grad_norm": 3.4410293102264404, "learning_rate": 6.3043425164810635e-06, "loss": 0.8793, "step": 5624 }, { "epoch": 0.43362627197039777, "grad_norm": 3.6440846920013428, "learning_rate": 6.303137256891563e-06, "loss": 1.027, "step": 5625 }, { "epoch": 0.43370336108541474, "grad_norm": 3.805568218231201, "learning_rate": 6.301931916058201e-06, "loss": 0.9033, "step": 5626 }, { "epoch": 0.4337804502004317, "grad_norm": 3.4418349266052246, "learning_rate": 6.300726494056131e-06, "loss": 0.8662, "step": 5627 }, { "epoch": 0.4338575393154487, "grad_norm": 3.30560302734375, "learning_rate": 6.299520990960497e-06, "loss": 0.959, "step": 5628 }, { "epoch": 0.4339346284304656, "grad_norm": 3.5205271244049072, "learning_rate": 6.298315406846462e-06, "loss": 0.9701, "step": 5629 }, { "epoch": 0.43401171754548257, "grad_norm": 3.5560543537139893, "learning_rate": 6.297109741789184e-06, "loss": 0.8881, "step": 5630 }, { "epoch": 0.43408880666049954, "grad_norm": 3.4408164024353027, "learning_rate": 6.2959039958638325e-06, "loss": 0.9109, "step": 5631 }, { "epoch": 0.4341658957755165, "grad_norm": 3.3632290363311768, "learning_rate": 6.2946981691455775e-06, "loss": 0.8839, "step": 5632 }, { "epoch": 0.4342429848905335, "grad_norm": 3.5588736534118652, "learning_rate": 6.293492261709597e-06, "loss": 0.9453, "step": 5633 }, { "epoch": 0.4343200740055504, "grad_norm": 3.7234859466552734, "learning_rate": 6.292286273631074e-06, "loss": 1.0011, "step": 5634 }, { "epoch": 0.43439716312056736, "grad_norm": 3.504631280899048, "learning_rate": 6.291080204985195e-06, "loss": 0.8339, "step": 5635 }, { "epoch": 0.43447425223558434, "grad_norm": 3.596510171890259, "learning_rate": 6.289874055847152e-06, "loss": 0.8935, "step": 5636 }, { "epoch": 0.4345513413506013, "grad_norm": 3.4602015018463135, "learning_rate": 6.288667826292142e-06, "loss": 0.9409, "step": 5637 }, { "epoch": 0.4346284304656183, "grad_norm": 3.5766189098358154, "learning_rate": 6.2874615163953654e-06, "loss": 0.9501, "step": 5638 }, { "epoch": 0.4347055195806352, "grad_norm": 3.890477180480957, "learning_rate": 6.286255126232032e-06, "loss": 0.8271, "step": 5639 }, { "epoch": 0.43478260869565216, "grad_norm": 3.6183977127075195, "learning_rate": 6.285048655877355e-06, "loss": 0.9268, "step": 5640 }, { "epoch": 0.43485969781066913, "grad_norm": 3.9595730304718018, "learning_rate": 6.283842105406548e-06, "loss": 1.0355, "step": 5641 }, { "epoch": 0.4349367869256861, "grad_norm": 3.801222801208496, "learning_rate": 6.282635474894836e-06, "loss": 0.9763, "step": 5642 }, { "epoch": 0.4350138760407031, "grad_norm": 3.6348776817321777, "learning_rate": 6.281428764417444e-06, "loss": 1.0238, "step": 5643 }, { "epoch": 0.43509096515572, "grad_norm": 3.195066452026367, "learning_rate": 6.280221974049606e-06, "loss": 0.8491, "step": 5644 }, { "epoch": 0.43516805427073696, "grad_norm": 3.7148594856262207, "learning_rate": 6.279015103866559e-06, "loss": 0.871, "step": 5645 }, { "epoch": 0.43524514338575393, "grad_norm": 3.536971092224121, "learning_rate": 6.2778081539435436e-06, "loss": 0.9518, "step": 5646 }, { "epoch": 0.4353222325007709, "grad_norm": 3.973155975341797, "learning_rate": 6.276601124355807e-06, "loss": 1.012, "step": 5647 }, { "epoch": 0.43539932161578787, "grad_norm": 3.6045761108398438, "learning_rate": 6.275394015178603e-06, "loss": 0.983, "step": 5648 }, { "epoch": 0.4354764107308048, "grad_norm": 3.582974433898926, "learning_rate": 6.274186826487187e-06, "loss": 0.9876, "step": 5649 }, { "epoch": 0.43555349984582176, "grad_norm": 3.7312257289886475, "learning_rate": 6.272979558356821e-06, "loss": 1.029, "step": 5650 }, { "epoch": 0.4356305889608387, "grad_norm": 3.7958641052246094, "learning_rate": 6.271772210862773e-06, "loss": 0.9989, "step": 5651 }, { "epoch": 0.4357076780758557, "grad_norm": 3.4761128425598145, "learning_rate": 6.270564784080314e-06, "loss": 1.0541, "step": 5652 }, { "epoch": 0.43578476719087267, "grad_norm": 3.6425018310546875, "learning_rate": 6.269357278084723e-06, "loss": 1.0119, "step": 5653 }, { "epoch": 0.4358618563058896, "grad_norm": 3.6321396827697754, "learning_rate": 6.268149692951278e-06, "loss": 1.0429, "step": 5654 }, { "epoch": 0.43593894542090655, "grad_norm": 3.9710259437561035, "learning_rate": 6.266942028755269e-06, "loss": 0.9756, "step": 5655 }, { "epoch": 0.4360160345359235, "grad_norm": 3.927583694458008, "learning_rate": 6.265734285571985e-06, "loss": 1.0705, "step": 5656 }, { "epoch": 0.4360931236509405, "grad_norm": 3.5504817962646484, "learning_rate": 6.2645264634767236e-06, "loss": 1.0056, "step": 5657 }, { "epoch": 0.43617021276595747, "grad_norm": 3.7636258602142334, "learning_rate": 6.263318562544787e-06, "loss": 1.011, "step": 5658 }, { "epoch": 0.4362473018809744, "grad_norm": 4.358571529388428, "learning_rate": 6.262110582851481e-06, "loss": 0.9815, "step": 5659 }, { "epoch": 0.43632439099599135, "grad_norm": 3.5723724365234375, "learning_rate": 6.260902524472116e-06, "loss": 0.8575, "step": 5660 }, { "epoch": 0.4364014801110083, "grad_norm": 3.437037229537964, "learning_rate": 6.2596943874820105e-06, "loss": 0.9603, "step": 5661 }, { "epoch": 0.4364785692260253, "grad_norm": 3.732524871826172, "learning_rate": 6.2584861719564825e-06, "loss": 0.8858, "step": 5662 }, { "epoch": 0.43655565834104226, "grad_norm": 3.429790496826172, "learning_rate": 6.25727787797086e-06, "loss": 1.0058, "step": 5663 }, { "epoch": 0.4366327474560592, "grad_norm": 3.6642956733703613, "learning_rate": 6.256069505600474e-06, "loss": 0.9124, "step": 5664 }, { "epoch": 0.43670983657107615, "grad_norm": 3.6058828830718994, "learning_rate": 6.254861054920659e-06, "loss": 0.9339, "step": 5665 }, { "epoch": 0.4367869256860931, "grad_norm": 3.4865617752075195, "learning_rate": 6.2536525260067575e-06, "loss": 0.9652, "step": 5666 }, { "epoch": 0.4368640148011101, "grad_norm": 3.6662580966949463, "learning_rate": 6.252443918934112e-06, "loss": 0.9143, "step": 5667 }, { "epoch": 0.43694110391612706, "grad_norm": 4.207685470581055, "learning_rate": 6.251235233778075e-06, "loss": 0.9565, "step": 5668 }, { "epoch": 0.437018193031144, "grad_norm": 3.748297929763794, "learning_rate": 6.250026470614003e-06, "loss": 1.1053, "step": 5669 }, { "epoch": 0.43709528214616095, "grad_norm": 3.7725093364715576, "learning_rate": 6.248817629517253e-06, "loss": 0.9306, "step": 5670 }, { "epoch": 0.4371723712611779, "grad_norm": 3.868713617324829, "learning_rate": 6.247608710563192e-06, "loss": 0.9549, "step": 5671 }, { "epoch": 0.4372494603761949, "grad_norm": 3.3361618518829346, "learning_rate": 6.2463997138271905e-06, "loss": 0.9246, "step": 5672 }, { "epoch": 0.43732654949121186, "grad_norm": 3.693483591079712, "learning_rate": 6.24519063938462e-06, "loss": 0.9676, "step": 5673 }, { "epoch": 0.4374036386062288, "grad_norm": 3.5362865924835205, "learning_rate": 6.243981487310864e-06, "loss": 1.0187, "step": 5674 }, { "epoch": 0.43748072772124574, "grad_norm": 3.2017054557800293, "learning_rate": 6.242772257681304e-06, "loss": 0.9676, "step": 5675 }, { "epoch": 0.4375578168362627, "grad_norm": 3.7719571590423584, "learning_rate": 6.241562950571331e-06, "loss": 0.9562, "step": 5676 }, { "epoch": 0.4376349059512797, "grad_norm": 3.446551561355591, "learning_rate": 6.240353566056339e-06, "loss": 0.9793, "step": 5677 }, { "epoch": 0.43771199506629666, "grad_norm": 4.14500617980957, "learning_rate": 6.239144104211724e-06, "loss": 1.0242, "step": 5678 }, { "epoch": 0.4377890841813136, "grad_norm": 4.008180141448975, "learning_rate": 6.237934565112893e-06, "loss": 1.0776, "step": 5679 }, { "epoch": 0.43786617329633054, "grad_norm": 3.641493082046509, "learning_rate": 6.2367249488352535e-06, "loss": 1.0238, "step": 5680 }, { "epoch": 0.4379432624113475, "grad_norm": 3.8391215801239014, "learning_rate": 6.235515255454218e-06, "loss": 1.0155, "step": 5681 }, { "epoch": 0.4380203515263645, "grad_norm": 3.3518407344818115, "learning_rate": 6.234305485045205e-06, "loss": 0.8861, "step": 5682 }, { "epoch": 0.43809744064138145, "grad_norm": 3.9264771938323975, "learning_rate": 6.2330956376836384e-06, "loss": 1.0198, "step": 5683 }, { "epoch": 0.4381745297563984, "grad_norm": 3.6416938304901123, "learning_rate": 6.231885713444944e-06, "loss": 0.9194, "step": 5684 }, { "epoch": 0.43825161887141534, "grad_norm": 3.5663013458251953, "learning_rate": 6.230675712404557e-06, "loss": 0.8705, "step": 5685 }, { "epoch": 0.4383287079864323, "grad_norm": 3.5762104988098145, "learning_rate": 6.229465634637912e-06, "loss": 1.0757, "step": 5686 }, { "epoch": 0.4384057971014493, "grad_norm": 4.03668212890625, "learning_rate": 6.2282554802204535e-06, "loss": 0.9993, "step": 5687 }, { "epoch": 0.43848288621646625, "grad_norm": 3.1501452922821045, "learning_rate": 6.2270452492276265e-06, "loss": 0.8529, "step": 5688 }, { "epoch": 0.4385599753314832, "grad_norm": 4.269925117492676, "learning_rate": 6.225834941734883e-06, "loss": 0.9343, "step": 5689 }, { "epoch": 0.43863706444650014, "grad_norm": 3.7110297679901123, "learning_rate": 6.22462455781768e-06, "loss": 0.9587, "step": 5690 }, { "epoch": 0.4387141535615171, "grad_norm": 3.938750743865967, "learning_rate": 6.223414097551478e-06, "loss": 0.979, "step": 5691 }, { "epoch": 0.4387912426765341, "grad_norm": 3.5555200576782227, "learning_rate": 6.2222035610117424e-06, "loss": 0.9671, "step": 5692 }, { "epoch": 0.43886833179155105, "grad_norm": 3.7331223487854004, "learning_rate": 6.220992948273947e-06, "loss": 0.8927, "step": 5693 }, { "epoch": 0.438945420906568, "grad_norm": 3.361193895339966, "learning_rate": 6.219782259413562e-06, "loss": 0.8326, "step": 5694 }, { "epoch": 0.43902251002158493, "grad_norm": 3.461210250854492, "learning_rate": 6.218571494506073e-06, "loss": 0.8966, "step": 5695 }, { "epoch": 0.4390995991366019, "grad_norm": 3.2159628868103027, "learning_rate": 6.2173606536269605e-06, "loss": 0.8993, "step": 5696 }, { "epoch": 0.4391766882516189, "grad_norm": 3.616848945617676, "learning_rate": 6.2161497368517175e-06, "loss": 0.9901, "step": 5697 }, { "epoch": 0.43925377736663584, "grad_norm": 3.6139261722564697, "learning_rate": 6.214938744255837e-06, "loss": 0.8664, "step": 5698 }, { "epoch": 0.4393308664816528, "grad_norm": 3.526050567626953, "learning_rate": 6.213727675914818e-06, "loss": 1.0925, "step": 5699 }, { "epoch": 0.43940795559666973, "grad_norm": 3.36438250541687, "learning_rate": 6.212516531904164e-06, "loss": 0.9525, "step": 5700 }, { "epoch": 0.4394850447116867, "grad_norm": 3.640000343322754, "learning_rate": 6.2113053122993846e-06, "loss": 1.0309, "step": 5701 }, { "epoch": 0.43956213382670367, "grad_norm": 3.7838895320892334, "learning_rate": 6.210094017175991e-06, "loss": 0.9115, "step": 5702 }, { "epoch": 0.43963922294172064, "grad_norm": 3.5852866172790527, "learning_rate": 6.208882646609505e-06, "loss": 0.918, "step": 5703 }, { "epoch": 0.4397163120567376, "grad_norm": 3.5891425609588623, "learning_rate": 6.207671200675446e-06, "loss": 0.961, "step": 5704 }, { "epoch": 0.4397934011717545, "grad_norm": 3.654510974884033, "learning_rate": 6.206459679449341e-06, "loss": 0.9256, "step": 5705 }, { "epoch": 0.4398704902867715, "grad_norm": 3.4456582069396973, "learning_rate": 6.205248083006725e-06, "loss": 0.9671, "step": 5706 }, { "epoch": 0.43994757940178847, "grad_norm": 3.725618362426758, "learning_rate": 6.204036411423133e-06, "loss": 1.0621, "step": 5707 }, { "epoch": 0.44002466851680544, "grad_norm": 3.431455135345459, "learning_rate": 6.202824664774107e-06, "loss": 0.9588, "step": 5708 }, { "epoch": 0.4401017576318224, "grad_norm": 3.980421543121338, "learning_rate": 6.201612843135192e-06, "loss": 1.0372, "step": 5709 }, { "epoch": 0.4401788467468393, "grad_norm": 3.611001491546631, "learning_rate": 6.200400946581939e-06, "loss": 0.9616, "step": 5710 }, { "epoch": 0.4402559358618563, "grad_norm": 3.704310417175293, "learning_rate": 6.199188975189905e-06, "loss": 1.0496, "step": 5711 }, { "epoch": 0.44033302497687327, "grad_norm": 3.831941843032837, "learning_rate": 6.197976929034649e-06, "loss": 0.9209, "step": 5712 }, { "epoch": 0.44041011409189024, "grad_norm": 3.790203094482422, "learning_rate": 6.196764808191735e-06, "loss": 0.9974, "step": 5713 }, { "epoch": 0.4404872032069072, "grad_norm": 3.4942948818206787, "learning_rate": 6.195552612736735e-06, "loss": 1.0154, "step": 5714 }, { "epoch": 0.4405642923219241, "grad_norm": 3.5759153366088867, "learning_rate": 6.19434034274522e-06, "loss": 0.9808, "step": 5715 }, { "epoch": 0.4406413814369411, "grad_norm": 3.54484486579895, "learning_rate": 6.19312799829277e-06, "loss": 0.8969, "step": 5716 }, { "epoch": 0.44071847055195806, "grad_norm": 3.6887714862823486, "learning_rate": 6.191915579454971e-06, "loss": 0.9693, "step": 5717 }, { "epoch": 0.44079555966697503, "grad_norm": 3.173921585083008, "learning_rate": 6.1907030863074055e-06, "loss": 0.9008, "step": 5718 }, { "epoch": 0.440872648781992, "grad_norm": 3.73128080368042, "learning_rate": 6.189490518925673e-06, "loss": 1.0533, "step": 5719 }, { "epoch": 0.4409497378970089, "grad_norm": 3.7085869312286377, "learning_rate": 6.188277877385365e-06, "loss": 0.9966, "step": 5720 }, { "epoch": 0.4410268270120259, "grad_norm": 3.4095280170440674, "learning_rate": 6.187065161762085e-06, "loss": 0.9337, "step": 5721 }, { "epoch": 0.44110391612704286, "grad_norm": 3.37434720993042, "learning_rate": 6.1858523721314425e-06, "loss": 0.9576, "step": 5722 }, { "epoch": 0.44118100524205983, "grad_norm": 3.524092674255371, "learning_rate": 6.184639508569043e-06, "loss": 0.943, "step": 5723 }, { "epoch": 0.4412580943570768, "grad_norm": 3.7031326293945312, "learning_rate": 6.183426571150508e-06, "loss": 0.941, "step": 5724 }, { "epoch": 0.4413351834720937, "grad_norm": 3.827249050140381, "learning_rate": 6.182213559951456e-06, "loss": 0.9173, "step": 5725 }, { "epoch": 0.4414122725871107, "grad_norm": 3.6542346477508545, "learning_rate": 6.181000475047509e-06, "loss": 0.9171, "step": 5726 }, { "epoch": 0.44148936170212766, "grad_norm": 3.8441221714019775, "learning_rate": 6.1797873165143005e-06, "loss": 1.0174, "step": 5727 }, { "epoch": 0.44156645081714463, "grad_norm": 3.4966237545013428, "learning_rate": 6.178574084427464e-06, "loss": 0.9042, "step": 5728 }, { "epoch": 0.4416435399321616, "grad_norm": 3.398782968521118, "learning_rate": 6.1773607788626355e-06, "loss": 0.9617, "step": 5729 }, { "epoch": 0.4417206290471785, "grad_norm": 3.6098642349243164, "learning_rate": 6.176147399895461e-06, "loss": 0.9554, "step": 5730 }, { "epoch": 0.4417977181621955, "grad_norm": 4.328033924102783, "learning_rate": 6.174933947601587e-06, "loss": 1.0728, "step": 5731 }, { "epoch": 0.44187480727721246, "grad_norm": 3.6380655765533447, "learning_rate": 6.173720422056666e-06, "loss": 0.9786, "step": 5732 }, { "epoch": 0.4419518963922294, "grad_norm": 3.8986289501190186, "learning_rate": 6.172506823336357e-06, "loss": 0.9966, "step": 5733 }, { "epoch": 0.4420289855072464, "grad_norm": 3.5367014408111572, "learning_rate": 6.17129315151632e-06, "loss": 0.9705, "step": 5734 }, { "epoch": 0.4421060746222633, "grad_norm": 3.6071369647979736, "learning_rate": 6.17007940667222e-06, "loss": 0.8371, "step": 5735 }, { "epoch": 0.4421831637372803, "grad_norm": 3.784031867980957, "learning_rate": 6.16886558887973e-06, "loss": 0.9845, "step": 5736 }, { "epoch": 0.44226025285229725, "grad_norm": 3.7865121364593506, "learning_rate": 6.167651698214524e-06, "loss": 0.9009, "step": 5737 }, { "epoch": 0.4423373419673142, "grad_norm": 3.29433274269104, "learning_rate": 6.166437734752282e-06, "loss": 0.8721, "step": 5738 }, { "epoch": 0.4424144310823312, "grad_norm": 3.7611396312713623, "learning_rate": 6.165223698568689e-06, "loss": 1.0397, "step": 5739 }, { "epoch": 0.4424915201973481, "grad_norm": 3.6707139015197754, "learning_rate": 6.164009589739431e-06, "loss": 0.9511, "step": 5740 }, { "epoch": 0.4425686093123651, "grad_norm": 3.516786575317383, "learning_rate": 6.162795408340206e-06, "loss": 0.9583, "step": 5741 }, { "epoch": 0.44264569842738205, "grad_norm": 3.280879259109497, "learning_rate": 6.161581154446709e-06, "loss": 0.937, "step": 5742 }, { "epoch": 0.442722787542399, "grad_norm": 3.570941209793091, "learning_rate": 6.1603668281346425e-06, "loss": 1.048, "step": 5743 }, { "epoch": 0.442799876657416, "grad_norm": 3.6662280559539795, "learning_rate": 6.159152429479714e-06, "loss": 0.9452, "step": 5744 }, { "epoch": 0.4428769657724329, "grad_norm": 3.7819111347198486, "learning_rate": 6.157937958557635e-06, "loss": 1.0653, "step": 5745 }, { "epoch": 0.4429540548874499, "grad_norm": 3.768911361694336, "learning_rate": 6.156723415444123e-06, "loss": 0.8976, "step": 5746 }, { "epoch": 0.44303114400246685, "grad_norm": 3.1364481449127197, "learning_rate": 6.155508800214894e-06, "loss": 0.9241, "step": 5747 }, { "epoch": 0.4431082331174838, "grad_norm": 4.069226264953613, "learning_rate": 6.154294112945678e-06, "loss": 1.0962, "step": 5748 }, { "epoch": 0.4431853222325008, "grad_norm": 3.394063711166382, "learning_rate": 6.153079353712201e-06, "loss": 0.8823, "step": 5749 }, { "epoch": 0.4432624113475177, "grad_norm": 3.523238182067871, "learning_rate": 6.1518645225902e-06, "loss": 1.007, "step": 5750 }, { "epoch": 0.4433395004625347, "grad_norm": 3.942383050918579, "learning_rate": 6.150649619655411e-06, "loss": 0.9954, "step": 5751 }, { "epoch": 0.44341658957755165, "grad_norm": 3.7958526611328125, "learning_rate": 6.149434644983576e-06, "loss": 0.9991, "step": 5752 }, { "epoch": 0.4434936786925686, "grad_norm": 3.4549882411956787, "learning_rate": 6.148219598650444e-06, "loss": 0.9293, "step": 5753 }, { "epoch": 0.4435707678075856, "grad_norm": 4.054780960083008, "learning_rate": 6.1470044807317695e-06, "loss": 0.8547, "step": 5754 }, { "epoch": 0.4436478569226025, "grad_norm": 3.937290906906128, "learning_rate": 6.145789291303305e-06, "loss": 0.9548, "step": 5755 }, { "epoch": 0.44372494603761947, "grad_norm": 3.627803325653076, "learning_rate": 6.144574030440811e-06, "loss": 0.9164, "step": 5756 }, { "epoch": 0.44380203515263644, "grad_norm": 3.1679458618164062, "learning_rate": 6.143358698220055e-06, "loss": 0.9428, "step": 5757 }, { "epoch": 0.4438791242676534, "grad_norm": 3.7125744819641113, "learning_rate": 6.142143294716806e-06, "loss": 0.91, "step": 5758 }, { "epoch": 0.4439562133826704, "grad_norm": 4.144897937774658, "learning_rate": 6.140927820006838e-06, "loss": 1.0763, "step": 5759 }, { "epoch": 0.4440333024976873, "grad_norm": 3.8823111057281494, "learning_rate": 6.139712274165929e-06, "loss": 0.9906, "step": 5760 }, { "epoch": 0.44411039161270427, "grad_norm": 3.5604453086853027, "learning_rate": 6.138496657269862e-06, "loss": 0.8591, "step": 5761 }, { "epoch": 0.44418748072772124, "grad_norm": 3.5651559829711914, "learning_rate": 6.1372809693944255e-06, "loss": 1.0111, "step": 5762 }, { "epoch": 0.4442645698427382, "grad_norm": 3.4907076358795166, "learning_rate": 6.1360652106154095e-06, "loss": 0.9794, "step": 5763 }, { "epoch": 0.4443416589577552, "grad_norm": 3.5271456241607666, "learning_rate": 6.134849381008613e-06, "loss": 1.0717, "step": 5764 }, { "epoch": 0.44441874807277215, "grad_norm": 3.589846611022949, "learning_rate": 6.1336334806498356e-06, "loss": 1.0364, "step": 5765 }, { "epoch": 0.44449583718778907, "grad_norm": 3.321850299835205, "learning_rate": 6.13241750961488e-06, "loss": 0.9513, "step": 5766 }, { "epoch": 0.44457292630280604, "grad_norm": 3.9627490043640137, "learning_rate": 6.13120146797956e-06, "loss": 0.9762, "step": 5767 }, { "epoch": 0.444650015417823, "grad_norm": 3.2399096488952637, "learning_rate": 6.129985355819684e-06, "loss": 0.9448, "step": 5768 }, { "epoch": 0.44472710453284, "grad_norm": 3.653228521347046, "learning_rate": 6.128769173211075e-06, "loss": 0.9295, "step": 5769 }, { "epoch": 0.44480419364785695, "grad_norm": 3.379409074783325, "learning_rate": 6.127552920229556e-06, "loss": 0.8892, "step": 5770 }, { "epoch": 0.44488128276287386, "grad_norm": 3.461467981338501, "learning_rate": 6.126336596950949e-06, "loss": 0.9007, "step": 5771 }, { "epoch": 0.44495837187789083, "grad_norm": 3.425234794616699, "learning_rate": 6.1251202034510905e-06, "loss": 0.9136, "step": 5772 }, { "epoch": 0.4450354609929078, "grad_norm": 3.859802007675171, "learning_rate": 6.123903739805815e-06, "loss": 0.9927, "step": 5773 }, { "epoch": 0.4451125501079248, "grad_norm": 3.9901375770568848, "learning_rate": 6.1226872060909606e-06, "loss": 1.0381, "step": 5774 }, { "epoch": 0.44518963922294175, "grad_norm": 3.5192387104034424, "learning_rate": 6.121470602382375e-06, "loss": 0.9129, "step": 5775 }, { "epoch": 0.44526672833795866, "grad_norm": 3.422767400741577, "learning_rate": 6.1202539287559035e-06, "loss": 0.9601, "step": 5776 }, { "epoch": 0.44534381745297563, "grad_norm": 3.6714818477630615, "learning_rate": 6.119037185287402e-06, "loss": 0.9204, "step": 5777 }, { "epoch": 0.4454209065679926, "grad_norm": 3.7968335151672363, "learning_rate": 6.1178203720527285e-06, "loss": 0.9916, "step": 5778 }, { "epoch": 0.4454979956830096, "grad_norm": 3.772531747817993, "learning_rate": 6.116603489127744e-06, "loss": 1.0515, "step": 5779 }, { "epoch": 0.44557508479802654, "grad_norm": 3.4873077869415283, "learning_rate": 6.1153865365883146e-06, "loss": 0.9606, "step": 5780 }, { "epoch": 0.44565217391304346, "grad_norm": 3.7624447345733643, "learning_rate": 6.114169514510312e-06, "loss": 0.9, "step": 5781 }, { "epoch": 0.44572926302806043, "grad_norm": 3.8201286792755127, "learning_rate": 6.112952422969611e-06, "loss": 0.9601, "step": 5782 }, { "epoch": 0.4458063521430774, "grad_norm": 4.063814640045166, "learning_rate": 6.111735262042088e-06, "loss": 0.9778, "step": 5783 }, { "epoch": 0.44588344125809437, "grad_norm": 3.7615840435028076, "learning_rate": 6.11051803180363e-06, "loss": 0.9212, "step": 5784 }, { "epoch": 0.44596053037311134, "grad_norm": 3.5406413078308105, "learning_rate": 6.109300732330126e-06, "loss": 0.9414, "step": 5785 }, { "epoch": 0.44603761948812826, "grad_norm": 3.9752652645111084, "learning_rate": 6.1080833636974655e-06, "loss": 1.0694, "step": 5786 }, { "epoch": 0.4461147086031452, "grad_norm": 3.561919689178467, "learning_rate": 6.106865925981547e-06, "loss": 0.9985, "step": 5787 }, { "epoch": 0.4461917977181622, "grad_norm": 3.6946768760681152, "learning_rate": 6.105648419258271e-06, "loss": 1.0049, "step": 5788 }, { "epoch": 0.44626888683317917, "grad_norm": 3.748198986053467, "learning_rate": 6.10443084360354e-06, "loss": 0.9642, "step": 5789 }, { "epoch": 0.44634597594819614, "grad_norm": 3.938978672027588, "learning_rate": 6.103213199093267e-06, "loss": 1.0989, "step": 5790 }, { "epoch": 0.44642306506321305, "grad_norm": 3.319303035736084, "learning_rate": 6.101995485803367e-06, "loss": 1.0071, "step": 5791 }, { "epoch": 0.44650015417823, "grad_norm": 3.9416778087615967, "learning_rate": 6.100777703809753e-06, "loss": 1.038, "step": 5792 }, { "epoch": 0.446577243293247, "grad_norm": 3.5364232063293457, "learning_rate": 6.0995598531883504e-06, "loss": 0.862, "step": 5793 }, { "epoch": 0.44665433240826397, "grad_norm": 3.440767526626587, "learning_rate": 6.098341934015088e-06, "loss": 0.9708, "step": 5794 }, { "epoch": 0.44673142152328094, "grad_norm": 3.8628475666046143, "learning_rate": 6.097123946365893e-06, "loss": 0.936, "step": 5795 }, { "epoch": 0.44680851063829785, "grad_norm": 3.6398117542266846, "learning_rate": 6.095905890316701e-06, "loss": 1.0004, "step": 5796 }, { "epoch": 0.4468855997533148, "grad_norm": 4.144468307495117, "learning_rate": 6.094687765943455e-06, "loss": 1.0691, "step": 5797 }, { "epoch": 0.4469626888683318, "grad_norm": 3.321753740310669, "learning_rate": 6.093469573322094e-06, "loss": 1.0334, "step": 5798 }, { "epoch": 0.44703977798334876, "grad_norm": 3.512993574142456, "learning_rate": 6.09225131252857e-06, "loss": 1.0446, "step": 5799 }, { "epoch": 0.44711686709836573, "grad_norm": 3.447844982147217, "learning_rate": 6.091032983638833e-06, "loss": 0.9838, "step": 5800 }, { "epoch": 0.44719395621338265, "grad_norm": 3.6095683574676514, "learning_rate": 6.0898145867288395e-06, "loss": 0.9409, "step": 5801 }, { "epoch": 0.4472710453283996, "grad_norm": 3.7775659561157227, "learning_rate": 6.088596121874552e-06, "loss": 1.0464, "step": 5802 }, { "epoch": 0.4473481344434166, "grad_norm": 3.305006980895996, "learning_rate": 6.087377589151933e-06, "loss": 0.9671, "step": 5803 }, { "epoch": 0.44742522355843356, "grad_norm": 3.450387477874756, "learning_rate": 6.086158988636953e-06, "loss": 0.9715, "step": 5804 }, { "epoch": 0.44750231267345053, "grad_norm": 3.848440647125244, "learning_rate": 6.084940320405587e-06, "loss": 1.0587, "step": 5805 }, { "epoch": 0.44757940178846745, "grad_norm": 3.4928512573242188, "learning_rate": 6.08372158453381e-06, "loss": 0.922, "step": 5806 }, { "epoch": 0.4476564909034844, "grad_norm": 3.3881824016571045, "learning_rate": 6.082502781097603e-06, "loss": 0.9111, "step": 5807 }, { "epoch": 0.4477335800185014, "grad_norm": 3.2955267429351807, "learning_rate": 6.081283910172956e-06, "loss": 0.9145, "step": 5808 }, { "epoch": 0.44781066913351836, "grad_norm": 3.5794951915740967, "learning_rate": 6.080064971835857e-06, "loss": 0.9883, "step": 5809 }, { "epoch": 0.44788775824853533, "grad_norm": 3.3705379962921143, "learning_rate": 6.078845966162302e-06, "loss": 0.9652, "step": 5810 }, { "epoch": 0.44796484736355224, "grad_norm": 3.782891273498535, "learning_rate": 6.077626893228288e-06, "loss": 1.0398, "step": 5811 }, { "epoch": 0.4480419364785692, "grad_norm": 3.3794403076171875, "learning_rate": 6.076407753109818e-06, "loss": 0.9309, "step": 5812 }, { "epoch": 0.4481190255935862, "grad_norm": 3.325587272644043, "learning_rate": 6.0751885458829e-06, "loss": 0.9469, "step": 5813 }, { "epoch": 0.44819611470860315, "grad_norm": 3.5415618419647217, "learning_rate": 6.073969271623543e-06, "loss": 0.9648, "step": 5814 }, { "epoch": 0.4482732038236201, "grad_norm": 3.56663179397583, "learning_rate": 6.072749930407767e-06, "loss": 1.0515, "step": 5815 }, { "epoch": 0.44835029293863704, "grad_norm": 4.031259536743164, "learning_rate": 6.071530522311586e-06, "loss": 1.0355, "step": 5816 }, { "epoch": 0.448427382053654, "grad_norm": 3.370077610015869, "learning_rate": 6.070311047411027e-06, "loss": 0.8723, "step": 5817 }, { "epoch": 0.448504471168671, "grad_norm": 3.9571077823638916, "learning_rate": 6.069091505782119e-06, "loss": 0.9908, "step": 5818 }, { "epoch": 0.44858156028368795, "grad_norm": 3.552165985107422, "learning_rate": 6.067871897500891e-06, "loss": 0.9056, "step": 5819 }, { "epoch": 0.4486586493987049, "grad_norm": 3.8415639400482178, "learning_rate": 6.066652222643381e-06, "loss": 1.0353, "step": 5820 }, { "epoch": 0.44873573851372184, "grad_norm": 3.4832634925842285, "learning_rate": 6.0654324812856305e-06, "loss": 0.9881, "step": 5821 }, { "epoch": 0.4488128276287388, "grad_norm": 3.5420501232147217, "learning_rate": 6.064212673503682e-06, "loss": 0.9398, "step": 5822 }, { "epoch": 0.4488899167437558, "grad_norm": 3.6258022785186768, "learning_rate": 6.062992799373585e-06, "loss": 1.0994, "step": 5823 }, { "epoch": 0.44896700585877275, "grad_norm": 3.758962631225586, "learning_rate": 6.061772858971392e-06, "loss": 0.9196, "step": 5824 }, { "epoch": 0.4490440949737897, "grad_norm": 3.7305667400360107, "learning_rate": 6.060552852373161e-06, "loss": 0.9636, "step": 5825 }, { "epoch": 0.44912118408880664, "grad_norm": 3.6690361499786377, "learning_rate": 6.059332779654953e-06, "loss": 1.0034, "step": 5826 }, { "epoch": 0.4491982732038236, "grad_norm": 3.7183585166931152, "learning_rate": 6.05811264089283e-06, "loss": 1.0183, "step": 5827 }, { "epoch": 0.4492753623188406, "grad_norm": 3.5112946033477783, "learning_rate": 6.056892436162866e-06, "loss": 0.9799, "step": 5828 }, { "epoch": 0.44935245143385755, "grad_norm": 3.783031940460205, "learning_rate": 6.055672165541132e-06, "loss": 0.9735, "step": 5829 }, { "epoch": 0.4494295405488745, "grad_norm": 4.117906093597412, "learning_rate": 6.0544518291037055e-06, "loss": 1.0281, "step": 5830 }, { "epoch": 0.44950662966389143, "grad_norm": 3.5739166736602783, "learning_rate": 6.05323142692667e-06, "loss": 1.0257, "step": 5831 }, { "epoch": 0.4495837187789084, "grad_norm": 3.51526141166687, "learning_rate": 6.052010959086109e-06, "loss": 0.9378, "step": 5832 }, { "epoch": 0.4496608078939254, "grad_norm": 4.066465377807617, "learning_rate": 6.050790425658112e-06, "loss": 1.0126, "step": 5833 }, { "epoch": 0.44973789700894234, "grad_norm": 3.460562229156494, "learning_rate": 6.049569826718776e-06, "loss": 0.9873, "step": 5834 }, { "epoch": 0.4498149861239593, "grad_norm": 3.6307222843170166, "learning_rate": 6.048349162344196e-06, "loss": 0.9094, "step": 5835 }, { "epoch": 0.44989207523897623, "grad_norm": 4.065826892852783, "learning_rate": 6.047128432610476e-06, "loss": 0.8306, "step": 5836 }, { "epoch": 0.4499691643539932, "grad_norm": 3.523407459259033, "learning_rate": 6.045907637593722e-06, "loss": 0.9603, "step": 5837 }, { "epoch": 0.45004625346901017, "grad_norm": 3.768115997314453, "learning_rate": 6.044686777370042e-06, "loss": 1.0456, "step": 5838 }, { "epoch": 0.45012334258402714, "grad_norm": 3.527888059616089, "learning_rate": 6.043465852015553e-06, "loss": 1.0582, "step": 5839 }, { "epoch": 0.4502004316990441, "grad_norm": 3.7846176624298096, "learning_rate": 6.042244861606373e-06, "loss": 1.0712, "step": 5840 }, { "epoch": 0.450277520814061, "grad_norm": 3.638317584991455, "learning_rate": 6.041023806218622e-06, "loss": 0.9448, "step": 5841 }, { "epoch": 0.450354609929078, "grad_norm": 3.303644895553589, "learning_rate": 6.03980268592843e-06, "loss": 0.914, "step": 5842 }, { "epoch": 0.45043169904409497, "grad_norm": 3.3192503452301025, "learning_rate": 6.0385815008119254e-06, "loss": 0.9118, "step": 5843 }, { "epoch": 0.45050878815911194, "grad_norm": 3.531160831451416, "learning_rate": 6.037360250945243e-06, "loss": 0.966, "step": 5844 }, { "epoch": 0.4505858772741289, "grad_norm": 3.8585782051086426, "learning_rate": 6.036138936404521e-06, "loss": 0.9231, "step": 5845 }, { "epoch": 0.4506629663891459, "grad_norm": 3.3526854515075684, "learning_rate": 6.034917557265903e-06, "loss": 0.9663, "step": 5846 }, { "epoch": 0.4507400555041628, "grad_norm": 3.64371919631958, "learning_rate": 6.033696113605536e-06, "loss": 0.9702, "step": 5847 }, { "epoch": 0.45081714461917977, "grad_norm": 3.5181994438171387, "learning_rate": 6.0324746054995685e-06, "loss": 0.9026, "step": 5848 }, { "epoch": 0.45089423373419674, "grad_norm": 3.487680196762085, "learning_rate": 6.031253033024158e-06, "loss": 0.9488, "step": 5849 }, { "epoch": 0.4509713228492137, "grad_norm": 3.600533962249756, "learning_rate": 6.030031396255462e-06, "loss": 1.0593, "step": 5850 }, { "epoch": 0.4510484119642307, "grad_norm": 3.4745328426361084, "learning_rate": 6.028809695269641e-06, "loss": 0.937, "step": 5851 }, { "epoch": 0.4511255010792476, "grad_norm": 3.617902994155884, "learning_rate": 6.027587930142866e-06, "loss": 0.9552, "step": 5852 }, { "epoch": 0.45120259019426456, "grad_norm": 3.4752037525177, "learning_rate": 6.026366100951304e-06, "loss": 0.9546, "step": 5853 }, { "epoch": 0.45127967930928153, "grad_norm": 3.3312973976135254, "learning_rate": 6.025144207771132e-06, "loss": 0.9142, "step": 5854 }, { "epoch": 0.4513567684242985, "grad_norm": 4.001164436340332, "learning_rate": 6.0239222506785285e-06, "loss": 1.0792, "step": 5855 }, { "epoch": 0.4514338575393155, "grad_norm": 3.3809666633605957, "learning_rate": 6.0227002297496765e-06, "loss": 0.9077, "step": 5856 }, { "epoch": 0.4515109466543324, "grad_norm": 3.638946771621704, "learning_rate": 6.02147814506076e-06, "loss": 0.9814, "step": 5857 }, { "epoch": 0.45158803576934936, "grad_norm": 3.711111545562744, "learning_rate": 6.020255996687973e-06, "loss": 0.9074, "step": 5858 }, { "epoch": 0.45166512488436633, "grad_norm": 3.797400712966919, "learning_rate": 6.019033784707507e-06, "loss": 1.0543, "step": 5859 }, { "epoch": 0.4517422139993833, "grad_norm": 3.4948596954345703, "learning_rate": 6.017811509195565e-06, "loss": 0.9662, "step": 5860 }, { "epoch": 0.4518193031144003, "grad_norm": 3.3635761737823486, "learning_rate": 6.0165891702283444e-06, "loss": 0.8524, "step": 5861 }, { "epoch": 0.4518963922294172, "grad_norm": 3.4985485076904297, "learning_rate": 6.015366767882054e-06, "loss": 0.9585, "step": 5862 }, { "epoch": 0.45197348134443416, "grad_norm": 3.706798791885376, "learning_rate": 6.014144302232906e-06, "loss": 1.0279, "step": 5863 }, { "epoch": 0.45205057045945113, "grad_norm": 3.334749460220337, "learning_rate": 6.012921773357112e-06, "loss": 1.0438, "step": 5864 }, { "epoch": 0.4521276595744681, "grad_norm": 4.0636091232299805, "learning_rate": 6.011699181330891e-06, "loss": 1.0819, "step": 5865 }, { "epoch": 0.45220474868948507, "grad_norm": 3.427274703979492, "learning_rate": 6.0104765262304676e-06, "loss": 0.9706, "step": 5866 }, { "epoch": 0.452281837804502, "grad_norm": 3.34578800201416, "learning_rate": 6.009253808132064e-06, "loss": 0.8081, "step": 5867 }, { "epoch": 0.45235892691951896, "grad_norm": 4.062625408172607, "learning_rate": 6.008031027111913e-06, "loss": 1.0353, "step": 5868 }, { "epoch": 0.4524360160345359, "grad_norm": 3.454693078994751, "learning_rate": 6.00680818324625e-06, "loss": 0.9642, "step": 5869 }, { "epoch": 0.4525131051495529, "grad_norm": 3.7603514194488525, "learning_rate": 6.00558527661131e-06, "loss": 0.9058, "step": 5870 }, { "epoch": 0.45259019426456987, "grad_norm": 3.822753667831421, "learning_rate": 6.004362307283335e-06, "loss": 1.0457, "step": 5871 }, { "epoch": 0.4526672833795868, "grad_norm": 3.4553167819976807, "learning_rate": 6.003139275338573e-06, "loss": 0.9248, "step": 5872 }, { "epoch": 0.45274437249460375, "grad_norm": 3.557408332824707, "learning_rate": 6.001916180853271e-06, "loss": 1.0115, "step": 5873 }, { "epoch": 0.4528214616096207, "grad_norm": 3.668583393096924, "learning_rate": 6.0006930239036865e-06, "loss": 1.0528, "step": 5874 }, { "epoch": 0.4528985507246377, "grad_norm": 3.564915418624878, "learning_rate": 5.999469804566074e-06, "loss": 0.986, "step": 5875 }, { "epoch": 0.45297563983965466, "grad_norm": 3.27848744392395, "learning_rate": 5.998246522916695e-06, "loss": 0.8666, "step": 5876 }, { "epoch": 0.4530527289546716, "grad_norm": 3.95578932762146, "learning_rate": 5.997023179031815e-06, "loss": 1.0293, "step": 5877 }, { "epoch": 0.45312981806968855, "grad_norm": 3.2330212593078613, "learning_rate": 5.995799772987705e-06, "loss": 0.8926, "step": 5878 }, { "epoch": 0.4532069071847055, "grad_norm": 3.7614970207214355, "learning_rate": 5.994576304860636e-06, "loss": 0.9709, "step": 5879 }, { "epoch": 0.4532839962997225, "grad_norm": 3.9075229167938232, "learning_rate": 5.993352774726885e-06, "loss": 0.9729, "step": 5880 }, { "epoch": 0.45336108541473946, "grad_norm": 3.5691709518432617, "learning_rate": 5.992129182662733e-06, "loss": 0.9145, "step": 5881 }, { "epoch": 0.4534381745297564, "grad_norm": 3.983722686767578, "learning_rate": 5.990905528744466e-06, "loss": 0.9745, "step": 5882 }, { "epoch": 0.45351526364477335, "grad_norm": 3.6609230041503906, "learning_rate": 5.98968181304837e-06, "loss": 0.9585, "step": 5883 }, { "epoch": 0.4535923527597903, "grad_norm": 3.4482264518737793, "learning_rate": 5.98845803565074e-06, "loss": 1.0082, "step": 5884 }, { "epoch": 0.4536694418748073, "grad_norm": 3.4838855266571045, "learning_rate": 5.987234196627869e-06, "loss": 0.8997, "step": 5885 }, { "epoch": 0.45374653098982426, "grad_norm": 3.4517669677734375, "learning_rate": 5.986010296056059e-06, "loss": 1.0314, "step": 5886 }, { "epoch": 0.4538236201048412, "grad_norm": 3.517615556716919, "learning_rate": 5.984786334011617e-06, "loss": 0.9729, "step": 5887 }, { "epoch": 0.45390070921985815, "grad_norm": 3.7334585189819336, "learning_rate": 5.983562310570844e-06, "loss": 0.9424, "step": 5888 }, { "epoch": 0.4539777983348751, "grad_norm": 3.1004371643066406, "learning_rate": 5.982338225810056e-06, "loss": 0.8848, "step": 5889 }, { "epoch": 0.4540548874498921, "grad_norm": 3.9406261444091797, "learning_rate": 5.9811140798055674e-06, "loss": 1.0372, "step": 5890 }, { "epoch": 0.45413197656490906, "grad_norm": 3.5485575199127197, "learning_rate": 5.9798898726336965e-06, "loss": 0.8899, "step": 5891 }, { "epoch": 0.45420906567992597, "grad_norm": 3.7856481075286865, "learning_rate": 5.978665604370767e-06, "loss": 0.86, "step": 5892 }, { "epoch": 0.45428615479494294, "grad_norm": 3.8746819496154785, "learning_rate": 5.977441275093108e-06, "loss": 0.8669, "step": 5893 }, { "epoch": 0.4543632439099599, "grad_norm": 3.443347215652466, "learning_rate": 5.976216884877045e-06, "loss": 0.8895, "step": 5894 }, { "epoch": 0.4544403330249769, "grad_norm": 3.6835126876831055, "learning_rate": 5.974992433798916e-06, "loss": 0.9634, "step": 5895 }, { "epoch": 0.45451742213999385, "grad_norm": 4.053088665008545, "learning_rate": 5.973767921935059e-06, "loss": 0.933, "step": 5896 }, { "epoch": 0.45459451125501077, "grad_norm": 3.9018402099609375, "learning_rate": 5.972543349361813e-06, "loss": 0.8993, "step": 5897 }, { "epoch": 0.45467160037002774, "grad_norm": 3.5930070877075195, "learning_rate": 5.97131871615553e-06, "loss": 1.0334, "step": 5898 }, { "epoch": 0.4547486894850447, "grad_norm": 3.705622434616089, "learning_rate": 5.970094022392553e-06, "loss": 0.8647, "step": 5899 }, { "epoch": 0.4548257786000617, "grad_norm": 3.460371971130371, "learning_rate": 5.968869268149239e-06, "loss": 0.8912, "step": 5900 }, { "epoch": 0.45490286771507865, "grad_norm": 3.655982255935669, "learning_rate": 5.967644453501944e-06, "loss": 1.004, "step": 5901 }, { "epoch": 0.45497995683009557, "grad_norm": 3.730700969696045, "learning_rate": 5.966419578527027e-06, "loss": 0.9948, "step": 5902 }, { "epoch": 0.45505704594511254, "grad_norm": 3.825910806655884, "learning_rate": 5.965194643300858e-06, "loss": 1.0192, "step": 5903 }, { "epoch": 0.4551341350601295, "grad_norm": 3.2197327613830566, "learning_rate": 5.9639696478997985e-06, "loss": 0.8807, "step": 5904 }, { "epoch": 0.4552112241751465, "grad_norm": 3.4035542011260986, "learning_rate": 5.962744592400226e-06, "loss": 0.8079, "step": 5905 }, { "epoch": 0.45528831329016345, "grad_norm": 3.4863269329071045, "learning_rate": 5.961519476878513e-06, "loss": 0.8259, "step": 5906 }, { "epoch": 0.45536540240518036, "grad_norm": 3.9149959087371826, "learning_rate": 5.960294301411041e-06, "loss": 0.9398, "step": 5907 }, { "epoch": 0.45544249152019733, "grad_norm": 3.782867670059204, "learning_rate": 5.959069066074195e-06, "loss": 1.1127, "step": 5908 }, { "epoch": 0.4555195806352143, "grad_norm": 3.4562859535217285, "learning_rate": 5.957843770944357e-06, "loss": 0.9791, "step": 5909 }, { "epoch": 0.4555966697502313, "grad_norm": 3.538780689239502, "learning_rate": 5.956618416097921e-06, "loss": 0.965, "step": 5910 }, { "epoch": 0.45567375886524825, "grad_norm": 3.6963050365448, "learning_rate": 5.955393001611283e-06, "loss": 1.0603, "step": 5911 }, { "epoch": 0.45575084798026516, "grad_norm": 3.4785306453704834, "learning_rate": 5.954167527560837e-06, "loss": 0.9417, "step": 5912 }, { "epoch": 0.45582793709528213, "grad_norm": 3.757713556289673, "learning_rate": 5.952941994022988e-06, "loss": 1.0384, "step": 5913 }, { "epoch": 0.4559050262102991, "grad_norm": 3.715573310852051, "learning_rate": 5.951716401074143e-06, "loss": 0.9363, "step": 5914 }, { "epoch": 0.4559821153253161, "grad_norm": 3.4896347522735596, "learning_rate": 5.9504907487907086e-06, "loss": 1.0017, "step": 5915 }, { "epoch": 0.45605920444033304, "grad_norm": 3.7236409187316895, "learning_rate": 5.949265037249096e-06, "loss": 1.0504, "step": 5916 }, { "epoch": 0.45613629355534996, "grad_norm": 3.842233657836914, "learning_rate": 5.948039266525728e-06, "loss": 1.0006, "step": 5917 }, { "epoch": 0.45621338267036693, "grad_norm": 3.868191719055176, "learning_rate": 5.946813436697021e-06, "loss": 1.0097, "step": 5918 }, { "epoch": 0.4562904717853839, "grad_norm": 3.6733784675598145, "learning_rate": 5.9455875478394e-06, "loss": 1.0119, "step": 5919 }, { "epoch": 0.45636756090040087, "grad_norm": 3.975710153579712, "learning_rate": 5.944361600029291e-06, "loss": 1.0206, "step": 5920 }, { "epoch": 0.45644465001541784, "grad_norm": 3.2998998165130615, "learning_rate": 5.9431355933431285e-06, "loss": 0.8406, "step": 5921 }, { "epoch": 0.45652173913043476, "grad_norm": 3.8829758167266846, "learning_rate": 5.941909527857348e-06, "loss": 0.9318, "step": 5922 }, { "epoch": 0.4565988282454517, "grad_norm": 3.46357798576355, "learning_rate": 5.940683403648384e-06, "loss": 0.927, "step": 5923 }, { "epoch": 0.4566759173604687, "grad_norm": 3.7031266689300537, "learning_rate": 5.939457220792684e-06, "loss": 0.9815, "step": 5924 }, { "epoch": 0.45675300647548567, "grad_norm": 3.6570425033569336, "learning_rate": 5.938230979366691e-06, "loss": 1.0229, "step": 5925 }, { "epoch": 0.45683009559050264, "grad_norm": 3.5698962211608887, "learning_rate": 5.937004679446854e-06, "loss": 0.9961, "step": 5926 }, { "epoch": 0.45690718470551955, "grad_norm": 3.748589515686035, "learning_rate": 5.935778321109631e-06, "loss": 0.9987, "step": 5927 }, { "epoch": 0.4569842738205365, "grad_norm": 3.5768892765045166, "learning_rate": 5.934551904431473e-06, "loss": 0.9889, "step": 5928 }, { "epoch": 0.4570613629355535, "grad_norm": 3.2622320652008057, "learning_rate": 5.933325429488847e-06, "loss": 0.9082, "step": 5929 }, { "epoch": 0.45713845205057047, "grad_norm": 3.7464730739593506, "learning_rate": 5.9320988963582125e-06, "loss": 1.0409, "step": 5930 }, { "epoch": 0.45721554116558744, "grad_norm": 3.6139042377471924, "learning_rate": 5.93087230511604e-06, "loss": 1.0702, "step": 5931 }, { "epoch": 0.4572926302806044, "grad_norm": 3.8561818599700928, "learning_rate": 5.929645655838801e-06, "loss": 1.1142, "step": 5932 }, { "epoch": 0.4573697193956213, "grad_norm": 3.627636194229126, "learning_rate": 5.9284189486029684e-06, "loss": 0.9967, "step": 5933 }, { "epoch": 0.4574468085106383, "grad_norm": 4.026650428771973, "learning_rate": 5.927192183485023e-06, "loss": 1.0336, "step": 5934 }, { "epoch": 0.45752389762565526, "grad_norm": 3.624974012374878, "learning_rate": 5.925965360561448e-06, "loss": 0.9791, "step": 5935 }, { "epoch": 0.45760098674067223, "grad_norm": 3.952138662338257, "learning_rate": 5.924738479908728e-06, "loss": 0.9589, "step": 5936 }, { "epoch": 0.4576780758556892, "grad_norm": 4.19709587097168, "learning_rate": 5.923511541603353e-06, "loss": 1.0059, "step": 5937 }, { "epoch": 0.4577551649707061, "grad_norm": 4.1676530838012695, "learning_rate": 5.922284545721817e-06, "loss": 1.0851, "step": 5938 }, { "epoch": 0.4578322540857231, "grad_norm": 3.9117021560668945, "learning_rate": 5.921057492340614e-06, "loss": 1.0179, "step": 5939 }, { "epoch": 0.45790934320074006, "grad_norm": 3.553438186645508, "learning_rate": 5.919830381536249e-06, "loss": 0.9542, "step": 5940 }, { "epoch": 0.45798643231575703, "grad_norm": 3.3051836490631104, "learning_rate": 5.918603213385223e-06, "loss": 0.9783, "step": 5941 }, { "epoch": 0.458063521430774, "grad_norm": 4.240174770355225, "learning_rate": 5.917375987964044e-06, "loss": 1.0063, "step": 5942 }, { "epoch": 0.4581406105457909, "grad_norm": 3.5065135955810547, "learning_rate": 5.916148705349224e-06, "loss": 0.8109, "step": 5943 }, { "epoch": 0.4582176996608079, "grad_norm": 3.668158531188965, "learning_rate": 5.914921365617276e-06, "loss": 0.879, "step": 5944 }, { "epoch": 0.45829478877582486, "grad_norm": 3.529534339904785, "learning_rate": 5.9136939688447205e-06, "loss": 0.9102, "step": 5945 }, { "epoch": 0.45837187789084183, "grad_norm": 3.5600032806396484, "learning_rate": 5.912466515108078e-06, "loss": 0.9238, "step": 5946 }, { "epoch": 0.4584489670058588, "grad_norm": 3.8036692142486572, "learning_rate": 5.911239004483874e-06, "loss": 1.1042, "step": 5947 }, { "epoch": 0.4585260561208757, "grad_norm": 3.5569405555725098, "learning_rate": 5.9100114370486375e-06, "loss": 0.9662, "step": 5948 }, { "epoch": 0.4586031452358927, "grad_norm": 3.4714467525482178, "learning_rate": 5.9087838128789e-06, "loss": 0.9444, "step": 5949 }, { "epoch": 0.45868023435090965, "grad_norm": 3.8040552139282227, "learning_rate": 5.9075561320511994e-06, "loss": 0.9892, "step": 5950 }, { "epoch": 0.4587573234659266, "grad_norm": 3.3058183193206787, "learning_rate": 5.906328394642075e-06, "loss": 1.0018, "step": 5951 }, { "epoch": 0.4588344125809436, "grad_norm": 3.314758539199829, "learning_rate": 5.905100600728067e-06, "loss": 0.831, "step": 5952 }, { "epoch": 0.4589115016959605, "grad_norm": 3.5863702297210693, "learning_rate": 5.903872750385726e-06, "loss": 0.9611, "step": 5953 }, { "epoch": 0.4589885908109775, "grad_norm": 3.328200101852417, "learning_rate": 5.902644843691601e-06, "loss": 0.9919, "step": 5954 }, { "epoch": 0.45906567992599445, "grad_norm": 3.5870213508605957, "learning_rate": 5.901416880722242e-06, "loss": 0.9036, "step": 5955 }, { "epoch": 0.4591427690410114, "grad_norm": 3.978027582168579, "learning_rate": 5.900188861554213e-06, "loss": 0.9305, "step": 5956 }, { "epoch": 0.4592198581560284, "grad_norm": 3.5891008377075195, "learning_rate": 5.898960786264067e-06, "loss": 1.0179, "step": 5957 }, { "epoch": 0.4592969472710453, "grad_norm": 3.536594867706299, "learning_rate": 5.897732654928373e-06, "loss": 0.9776, "step": 5958 }, { "epoch": 0.4593740363860623, "grad_norm": 3.7737505435943604, "learning_rate": 5.896504467623698e-06, "loss": 1.121, "step": 5959 }, { "epoch": 0.45945112550107925, "grad_norm": 3.9404971599578857, "learning_rate": 5.895276224426613e-06, "loss": 1.0788, "step": 5960 }, { "epoch": 0.4595282146160962, "grad_norm": 3.931784152984619, "learning_rate": 5.894047925413691e-06, "loss": 0.988, "step": 5961 }, { "epoch": 0.4596053037311132, "grad_norm": 3.751889228820801, "learning_rate": 5.892819570661511e-06, "loss": 0.9438, "step": 5962 }, { "epoch": 0.4596823928461301, "grad_norm": 4.397334098815918, "learning_rate": 5.891591160246655e-06, "loss": 0.9381, "step": 5963 }, { "epoch": 0.4597594819611471, "grad_norm": 3.8391776084899902, "learning_rate": 5.890362694245709e-06, "loss": 0.8604, "step": 5964 }, { "epoch": 0.45983657107616405, "grad_norm": 3.5146422386169434, "learning_rate": 5.889134172735259e-06, "loss": 1.0192, "step": 5965 }, { "epoch": 0.459913660191181, "grad_norm": 3.9917097091674805, "learning_rate": 5.887905595791899e-06, "loss": 1.0062, "step": 5966 }, { "epoch": 0.459990749306198, "grad_norm": 3.921919345855713, "learning_rate": 5.886676963492224e-06, "loss": 0.9134, "step": 5967 }, { "epoch": 0.4600678384212149, "grad_norm": 3.3720543384552, "learning_rate": 5.885448275912832e-06, "loss": 0.8687, "step": 5968 }, { "epoch": 0.4601449275362319, "grad_norm": 3.6408445835113525, "learning_rate": 5.884219533130325e-06, "loss": 1.0733, "step": 5969 }, { "epoch": 0.46022201665124884, "grad_norm": 3.81526255607605, "learning_rate": 5.882990735221312e-06, "loss": 0.9489, "step": 5970 }, { "epoch": 0.4602991057662658, "grad_norm": 4.006250858306885, "learning_rate": 5.881761882262398e-06, "loss": 0.9353, "step": 5971 }, { "epoch": 0.4603761948812828, "grad_norm": 3.7868504524230957, "learning_rate": 5.880532974330197e-06, "loss": 1.0833, "step": 5972 }, { "epoch": 0.4604532839962997, "grad_norm": 3.490978956222534, "learning_rate": 5.879304011501327e-06, "loss": 1.0075, "step": 5973 }, { "epoch": 0.46053037311131667, "grad_norm": 3.4989683628082275, "learning_rate": 5.878074993852405e-06, "loss": 1.0013, "step": 5974 }, { "epoch": 0.46060746222633364, "grad_norm": 3.448817729949951, "learning_rate": 5.876845921460055e-06, "loss": 0.8911, "step": 5975 }, { "epoch": 0.4606845513413506, "grad_norm": 3.613193988800049, "learning_rate": 5.875616794400902e-06, "loss": 0.9418, "step": 5976 }, { "epoch": 0.4607616404563676, "grad_norm": 3.5848329067230225, "learning_rate": 5.874387612751579e-06, "loss": 1.0202, "step": 5977 }, { "epoch": 0.4608387295713845, "grad_norm": 3.828852415084839, "learning_rate": 5.8731583765887156e-06, "loss": 1.0841, "step": 5978 }, { "epoch": 0.46091581868640147, "grad_norm": 3.3265674114227295, "learning_rate": 5.87192908598895e-06, "loss": 0.8589, "step": 5979 }, { "epoch": 0.46099290780141844, "grad_norm": 3.5973331928253174, "learning_rate": 5.870699741028922e-06, "loss": 0.9637, "step": 5980 }, { "epoch": 0.4610699969164354, "grad_norm": 3.2978997230529785, "learning_rate": 5.869470341785274e-06, "loss": 0.9569, "step": 5981 }, { "epoch": 0.4611470860314524, "grad_norm": 3.7744216918945312, "learning_rate": 5.8682408883346535e-06, "loss": 0.9753, "step": 5982 }, { "epoch": 0.4612241751464693, "grad_norm": 3.4844672679901123, "learning_rate": 5.8670113807537095e-06, "loss": 0.9662, "step": 5983 }, { "epoch": 0.46130126426148627, "grad_norm": 3.5159308910369873, "learning_rate": 5.865781819119096e-06, "loss": 0.9549, "step": 5984 }, { "epoch": 0.46137835337650324, "grad_norm": 3.471386194229126, "learning_rate": 5.864552203507472e-06, "loss": 0.9481, "step": 5985 }, { "epoch": 0.4614554424915202, "grad_norm": 3.8364601135253906, "learning_rate": 5.863322533995495e-06, "loss": 0.9589, "step": 5986 }, { "epoch": 0.4615325316065372, "grad_norm": 3.745004415512085, "learning_rate": 5.862092810659829e-06, "loss": 0.9863, "step": 5987 }, { "epoch": 0.4616096207215541, "grad_norm": 3.7336912155151367, "learning_rate": 5.860863033577141e-06, "loss": 1.0487, "step": 5988 }, { "epoch": 0.46168670983657106, "grad_norm": 3.603816509246826, "learning_rate": 5.859633202824101e-06, "loss": 1.0228, "step": 5989 }, { "epoch": 0.46176379895158803, "grad_norm": 3.479917049407959, "learning_rate": 5.858403318477384e-06, "loss": 0.9293, "step": 5990 }, { "epoch": 0.461840888066605, "grad_norm": 3.8286755084991455, "learning_rate": 5.857173380613665e-06, "loss": 1.0632, "step": 5991 }, { "epoch": 0.461917977181622, "grad_norm": 3.5856165885925293, "learning_rate": 5.855943389309626e-06, "loss": 1.0216, "step": 5992 }, { "epoch": 0.4619950662966389, "grad_norm": 4.016223907470703, "learning_rate": 5.8547133446419495e-06, "loss": 0.9528, "step": 5993 }, { "epoch": 0.46207215541165586, "grad_norm": 3.4276955127716064, "learning_rate": 5.853483246687323e-06, "loss": 1.0603, "step": 5994 }, { "epoch": 0.46214924452667283, "grad_norm": 3.522810459136963, "learning_rate": 5.852253095522435e-06, "loss": 0.9513, "step": 5995 }, { "epoch": 0.4622263336416898, "grad_norm": 3.607531785964966, "learning_rate": 5.851022891223982e-06, "loss": 0.9798, "step": 5996 }, { "epoch": 0.46230342275670677, "grad_norm": 3.3895387649536133, "learning_rate": 5.849792633868659e-06, "loss": 0.9795, "step": 5997 }, { "epoch": 0.4623805118717237, "grad_norm": 4.266413688659668, "learning_rate": 5.848562323533165e-06, "loss": 0.9955, "step": 5998 }, { "epoch": 0.46245760098674066, "grad_norm": 3.460974931716919, "learning_rate": 5.8473319602942065e-06, "loss": 0.9737, "step": 5999 }, { "epoch": 0.46253469010175763, "grad_norm": 3.6543521881103516, "learning_rate": 5.846101544228488e-06, "loss": 0.898, "step": 6000 }, { "epoch": 0.4626117792167746, "grad_norm": 3.48816180229187, "learning_rate": 5.84487107541272e-06, "loss": 0.9795, "step": 6001 }, { "epoch": 0.46268886833179157, "grad_norm": 3.694929838180542, "learning_rate": 5.843640553923618e-06, "loss": 0.9415, "step": 6002 }, { "epoch": 0.4627659574468085, "grad_norm": 3.641181230545044, "learning_rate": 5.842409979837894e-06, "loss": 0.9422, "step": 6003 }, { "epoch": 0.46284304656182546, "grad_norm": 5.558597087860107, "learning_rate": 5.841179353232273e-06, "loss": 0.9911, "step": 6004 }, { "epoch": 0.4629201356768424, "grad_norm": 3.562645196914673, "learning_rate": 5.839948674183476e-06, "loss": 1.0034, "step": 6005 }, { "epoch": 0.4629972247918594, "grad_norm": 3.8084611892700195, "learning_rate": 5.8387179427682265e-06, "loss": 1.0519, "step": 6006 }, { "epoch": 0.46307431390687637, "grad_norm": 3.4862160682678223, "learning_rate": 5.837487159063259e-06, "loss": 0.9177, "step": 6007 }, { "epoch": 0.4631514030218933, "grad_norm": 3.7302255630493164, "learning_rate": 5.836256323145304e-06, "loss": 0.9929, "step": 6008 }, { "epoch": 0.46322849213691025, "grad_norm": 4.211978912353516, "learning_rate": 5.835025435091099e-06, "loss": 1.0944, "step": 6009 }, { "epoch": 0.4633055812519272, "grad_norm": 3.287256956100464, "learning_rate": 5.8337944949773825e-06, "loss": 0.9778, "step": 6010 }, { "epoch": 0.4633826703669442, "grad_norm": 3.5103518962860107, "learning_rate": 5.832563502880898e-06, "loss": 0.9329, "step": 6011 }, { "epoch": 0.46345975948196116, "grad_norm": 3.8197262287139893, "learning_rate": 5.831332458878391e-06, "loss": 1.0127, "step": 6012 }, { "epoch": 0.4635368485969781, "grad_norm": 3.5835108757019043, "learning_rate": 5.830101363046611e-06, "loss": 1.0235, "step": 6013 }, { "epoch": 0.46361393771199505, "grad_norm": 3.5647315979003906, "learning_rate": 5.82887021546231e-06, "loss": 0.9782, "step": 6014 }, { "epoch": 0.463691026827012, "grad_norm": 3.430677652359009, "learning_rate": 5.827639016202244e-06, "loss": 1.0517, "step": 6015 }, { "epoch": 0.463768115942029, "grad_norm": 4.109676837921143, "learning_rate": 5.826407765343172e-06, "loss": 0.8909, "step": 6016 }, { "epoch": 0.46384520505704596, "grad_norm": 5.432858943939209, "learning_rate": 5.825176462961854e-06, "loss": 0.9727, "step": 6017 }, { "epoch": 0.46392229417206293, "grad_norm": 3.428158760070801, "learning_rate": 5.82394510913506e-06, "loss": 0.9286, "step": 6018 }, { "epoch": 0.46399938328707985, "grad_norm": 3.5705456733703613, "learning_rate": 5.822713703939554e-06, "loss": 0.9397, "step": 6019 }, { "epoch": 0.4640764724020968, "grad_norm": 3.492794990539551, "learning_rate": 5.82148224745211e-06, "loss": 0.943, "step": 6020 }, { "epoch": 0.4641535615171138, "grad_norm": 3.4019174575805664, "learning_rate": 5.820250739749502e-06, "loss": 0.851, "step": 6021 }, { "epoch": 0.46423065063213076, "grad_norm": 3.375807046890259, "learning_rate": 5.819019180908509e-06, "loss": 0.9877, "step": 6022 }, { "epoch": 0.46430773974714773, "grad_norm": 3.3332841396331787, "learning_rate": 5.817787571005913e-06, "loss": 0.9421, "step": 6023 }, { "epoch": 0.46438482886216464, "grad_norm": 3.4136815071105957, "learning_rate": 5.8165559101184955e-06, "loss": 1.0011, "step": 6024 }, { "epoch": 0.4644619179771816, "grad_norm": 3.7559056282043457, "learning_rate": 5.8153241983230464e-06, "loss": 1.0067, "step": 6025 }, { "epoch": 0.4645390070921986, "grad_norm": 3.7920331954956055, "learning_rate": 5.814092435696358e-06, "loss": 0.9631, "step": 6026 }, { "epoch": 0.46461609620721556, "grad_norm": 3.2758357524871826, "learning_rate": 5.81286062231522e-06, "loss": 0.857, "step": 6027 }, { "epoch": 0.4646931853222325, "grad_norm": 3.8748836517333984, "learning_rate": 5.811628758256433e-06, "loss": 1.0907, "step": 6028 }, { "epoch": 0.46477027443724944, "grad_norm": 3.435882568359375, "learning_rate": 5.8103968435967965e-06, "loss": 0.8907, "step": 6029 }, { "epoch": 0.4648473635522664, "grad_norm": 3.716470956802368, "learning_rate": 5.809164878413114e-06, "loss": 0.9661, "step": 6030 }, { "epoch": 0.4649244526672834, "grad_norm": 3.935591697692871, "learning_rate": 5.807932862782193e-06, "loss": 1.005, "step": 6031 }, { "epoch": 0.46500154178230035, "grad_norm": 3.4351768493652344, "learning_rate": 5.8067007967808405e-06, "loss": 0.9601, "step": 6032 }, { "epoch": 0.4650786308973173, "grad_norm": 3.390435218811035, "learning_rate": 5.805468680485874e-06, "loss": 0.9108, "step": 6033 }, { "epoch": 0.46515572001233424, "grad_norm": 3.5145466327667236, "learning_rate": 5.804236513974104e-06, "loss": 0.9561, "step": 6034 }, { "epoch": 0.4652328091273512, "grad_norm": 3.6625545024871826, "learning_rate": 5.8030042973223545e-06, "loss": 0.896, "step": 6035 }, { "epoch": 0.4653098982423682, "grad_norm": 3.7747929096221924, "learning_rate": 5.801772030607445e-06, "loss": 0.9278, "step": 6036 }, { "epoch": 0.46538698735738515, "grad_norm": 3.4798998832702637, "learning_rate": 5.800539713906203e-06, "loss": 1.0179, "step": 6037 }, { "epoch": 0.4654640764724021, "grad_norm": 3.7864067554473877, "learning_rate": 5.799307347295455e-06, "loss": 0.9142, "step": 6038 }, { "epoch": 0.46554116558741904, "grad_norm": 3.8589086532592773, "learning_rate": 5.798074930852035e-06, "loss": 0.8484, "step": 6039 }, { "epoch": 0.465618254702436, "grad_norm": 3.9709889888763428, "learning_rate": 5.796842464652774e-06, "loss": 1.1772, "step": 6040 }, { "epoch": 0.465695343817453, "grad_norm": 3.7802610397338867, "learning_rate": 5.7956099487745135e-06, "loss": 1.0716, "step": 6041 }, { "epoch": 0.46577243293246995, "grad_norm": 3.4964866638183594, "learning_rate": 5.794377383294094e-06, "loss": 0.9444, "step": 6042 }, { "epoch": 0.4658495220474869, "grad_norm": 3.519301176071167, "learning_rate": 5.7931447682883565e-06, "loss": 1.0331, "step": 6043 }, { "epoch": 0.46592661116250383, "grad_norm": 3.5994677543640137, "learning_rate": 5.791912103834154e-06, "loss": 0.9118, "step": 6044 }, { "epoch": 0.4660037002775208, "grad_norm": 3.669741630554199, "learning_rate": 5.79067939000833e-06, "loss": 0.9314, "step": 6045 }, { "epoch": 0.4660807893925378, "grad_norm": 3.6019272804260254, "learning_rate": 5.7894466268877426e-06, "loss": 0.9013, "step": 6046 }, { "epoch": 0.46615787850755475, "grad_norm": 4.028010845184326, "learning_rate": 5.788213814549247e-06, "loss": 0.9965, "step": 6047 }, { "epoch": 0.4662349676225717, "grad_norm": 3.4716103076934814, "learning_rate": 5.786980953069702e-06, "loss": 0.8853, "step": 6048 }, { "epoch": 0.46631205673758863, "grad_norm": 3.9504270553588867, "learning_rate": 5.785748042525969e-06, "loss": 1.1174, "step": 6049 }, { "epoch": 0.4663891458526056, "grad_norm": 3.453526258468628, "learning_rate": 5.784515082994917e-06, "loss": 1.031, "step": 6050 }, { "epoch": 0.4664662349676226, "grad_norm": 3.568610668182373, "learning_rate": 5.783282074553412e-06, "loss": 0.9396, "step": 6051 }, { "epoch": 0.46654332408263954, "grad_norm": 3.7350780963897705, "learning_rate": 5.782049017278326e-06, "loss": 0.9443, "step": 6052 }, { "epoch": 0.4666204131976565, "grad_norm": 4.244739532470703, "learning_rate": 5.7808159112465344e-06, "loss": 1.0614, "step": 6053 }, { "epoch": 0.46669750231267343, "grad_norm": 3.688356637954712, "learning_rate": 5.779582756534914e-06, "loss": 0.8876, "step": 6054 }, { "epoch": 0.4667745914276904, "grad_norm": 3.4765431880950928, "learning_rate": 5.778349553220348e-06, "loss": 0.9501, "step": 6055 }, { "epoch": 0.46685168054270737, "grad_norm": 3.3060991764068604, "learning_rate": 5.777116301379717e-06, "loss": 0.9299, "step": 6056 }, { "epoch": 0.46692876965772434, "grad_norm": 3.5781679153442383, "learning_rate": 5.775883001089911e-06, "loss": 0.9658, "step": 6057 }, { "epoch": 0.4670058587727413, "grad_norm": 3.767371892929077, "learning_rate": 5.7746496524278176e-06, "loss": 1.0249, "step": 6058 }, { "epoch": 0.4670829478877582, "grad_norm": 3.511653184890747, "learning_rate": 5.7734162554703284e-06, "loss": 0.8842, "step": 6059 }, { "epoch": 0.4671600370027752, "grad_norm": 3.546081781387329, "learning_rate": 5.7721828102943445e-06, "loss": 1.0454, "step": 6060 }, { "epoch": 0.46723712611779217, "grad_norm": 3.7766621112823486, "learning_rate": 5.770949316976759e-06, "loss": 0.9635, "step": 6061 }, { "epoch": 0.46731421523280914, "grad_norm": 3.4135143756866455, "learning_rate": 5.7697157755944775e-06, "loss": 0.9405, "step": 6062 }, { "epoch": 0.4673913043478261, "grad_norm": 3.601234197616577, "learning_rate": 5.768482186224405e-06, "loss": 0.9234, "step": 6063 }, { "epoch": 0.467468393462843, "grad_norm": 3.9338316917419434, "learning_rate": 5.7672485489434456e-06, "loss": 1.032, "step": 6064 }, { "epoch": 0.46754548257786, "grad_norm": 4.0997633934021, "learning_rate": 5.766014863828515e-06, "loss": 0.956, "step": 6065 }, { "epoch": 0.46762257169287696, "grad_norm": 3.778200387954712, "learning_rate": 5.764781130956525e-06, "loss": 0.9533, "step": 6066 }, { "epoch": 0.46769966080789394, "grad_norm": 3.8161983489990234, "learning_rate": 5.763547350404391e-06, "loss": 0.9875, "step": 6067 }, { "epoch": 0.4677767499229109, "grad_norm": 3.7838432788848877, "learning_rate": 5.762313522249036e-06, "loss": 0.9397, "step": 6068 }, { "epoch": 0.4678538390379278, "grad_norm": 3.383547067642212, "learning_rate": 5.761079646567379e-06, "loss": 0.95, "step": 6069 }, { "epoch": 0.4679309281529448, "grad_norm": 3.7536256313323975, "learning_rate": 5.7598457234363484e-06, "loss": 1.0254, "step": 6070 }, { "epoch": 0.46800801726796176, "grad_norm": 3.5894887447357178, "learning_rate": 5.7586117529328735e-06, "loss": 0.9251, "step": 6071 }, { "epoch": 0.46808510638297873, "grad_norm": 3.460188627243042, "learning_rate": 5.757377735133882e-06, "loss": 0.9488, "step": 6072 }, { "epoch": 0.4681621954979957, "grad_norm": 3.609666109085083, "learning_rate": 5.756143670116315e-06, "loss": 0.8807, "step": 6073 }, { "epoch": 0.4682392846130126, "grad_norm": 3.767493724822998, "learning_rate": 5.754909557957104e-06, "loss": 1.1737, "step": 6074 }, { "epoch": 0.4683163737280296, "grad_norm": 3.6794867515563965, "learning_rate": 5.753675398733192e-06, "loss": 1.0523, "step": 6075 }, { "epoch": 0.46839346284304656, "grad_norm": 3.692859649658203, "learning_rate": 5.752441192521523e-06, "loss": 0.9905, "step": 6076 }, { "epoch": 0.46847055195806353, "grad_norm": 3.4956281185150146, "learning_rate": 5.751206939399041e-06, "loss": 0.8996, "step": 6077 }, { "epoch": 0.4685476410730805, "grad_norm": 3.6580235958099365, "learning_rate": 5.749972639442698e-06, "loss": 1.0527, "step": 6078 }, { "epoch": 0.4686247301880974, "grad_norm": 3.3004517555236816, "learning_rate": 5.748738292729445e-06, "loss": 1.0016, "step": 6079 }, { "epoch": 0.4687018193031144, "grad_norm": 3.7859244346618652, "learning_rate": 5.747503899336238e-06, "loss": 1.0551, "step": 6080 }, { "epoch": 0.46877890841813136, "grad_norm": 3.9992268085479736, "learning_rate": 5.746269459340034e-06, "loss": 0.9909, "step": 6081 }, { "epoch": 0.4688559975331483, "grad_norm": 3.6973602771759033, "learning_rate": 5.7450349728177945e-06, "loss": 0.9538, "step": 6082 }, { "epoch": 0.4689330866481653, "grad_norm": 3.4329545497894287, "learning_rate": 5.743800439846482e-06, "loss": 0.9311, "step": 6083 }, { "epoch": 0.4690101757631822, "grad_norm": 3.846623182296753, "learning_rate": 5.742565860503066e-06, "loss": 0.9475, "step": 6084 }, { "epoch": 0.4690872648781992, "grad_norm": 4.217624664306641, "learning_rate": 5.741331234864513e-06, "loss": 1.204, "step": 6085 }, { "epoch": 0.46916435399321615, "grad_norm": 4.2322258949279785, "learning_rate": 5.740096563007797e-06, "loss": 0.9926, "step": 6086 }, { "epoch": 0.4692414431082331, "grad_norm": 4.023532867431641, "learning_rate": 5.738861845009894e-06, "loss": 1.0084, "step": 6087 }, { "epoch": 0.4693185322232501, "grad_norm": 3.6507327556610107, "learning_rate": 5.737627080947781e-06, "loss": 0.9628, "step": 6088 }, { "epoch": 0.469395621338267, "grad_norm": 3.5047266483306885, "learning_rate": 5.73639227089844e-06, "loss": 0.9771, "step": 6089 }, { "epoch": 0.469472710453284, "grad_norm": 3.76613450050354, "learning_rate": 5.735157414938855e-06, "loss": 0.981, "step": 6090 }, { "epoch": 0.46954979956830095, "grad_norm": 3.5989575386047363, "learning_rate": 5.733922513146013e-06, "loss": 0.8601, "step": 6091 }, { "epoch": 0.4696268886833179, "grad_norm": 4.787153720855713, "learning_rate": 5.732687565596904e-06, "loss": 1.1043, "step": 6092 }, { "epoch": 0.4697039777983349, "grad_norm": 3.478583812713623, "learning_rate": 5.731452572368517e-06, "loss": 0.9255, "step": 6093 }, { "epoch": 0.4697810669133518, "grad_norm": 3.7596375942230225, "learning_rate": 5.730217533537853e-06, "loss": 1.1279, "step": 6094 }, { "epoch": 0.4698581560283688, "grad_norm": 3.626962661743164, "learning_rate": 5.728982449181907e-06, "loss": 1.0483, "step": 6095 }, { "epoch": 0.46993524514338575, "grad_norm": 3.686464548110962, "learning_rate": 5.72774731937768e-06, "loss": 0.9567, "step": 6096 }, { "epoch": 0.4700123342584027, "grad_norm": 4.225037574768066, "learning_rate": 5.7265121442021784e-06, "loss": 1.0122, "step": 6097 }, { "epoch": 0.4700894233734197, "grad_norm": 3.9033050537109375, "learning_rate": 5.725276923732406e-06, "loss": 0.9657, "step": 6098 }, { "epoch": 0.4701665124884366, "grad_norm": 3.35575532913208, "learning_rate": 5.724041658045374e-06, "loss": 0.9243, "step": 6099 }, { "epoch": 0.4702436016034536, "grad_norm": 3.7150776386260986, "learning_rate": 5.722806347218095e-06, "loss": 0.98, "step": 6100 }, { "epoch": 0.47032069071847055, "grad_norm": 3.7382919788360596, "learning_rate": 5.721570991327583e-06, "loss": 1.1083, "step": 6101 }, { "epoch": 0.4703977798334875, "grad_norm": 3.8335604667663574, "learning_rate": 5.720335590450858e-06, "loss": 0.9726, "step": 6102 }, { "epoch": 0.4704748689485045, "grad_norm": 3.5674989223480225, "learning_rate": 5.719100144664938e-06, "loss": 1.0163, "step": 6103 }, { "epoch": 0.47055195806352146, "grad_norm": 3.2261412143707275, "learning_rate": 5.71786465404685e-06, "loss": 0.9454, "step": 6104 }, { "epoch": 0.4706290471785384, "grad_norm": 3.453087091445923, "learning_rate": 5.716629118673618e-06, "loss": 0.8817, "step": 6105 }, { "epoch": 0.47070613629355534, "grad_norm": 3.143789768218994, "learning_rate": 5.7153935386222715e-06, "loss": 0.9472, "step": 6106 }, { "epoch": 0.4707832254085723, "grad_norm": 3.5191619396209717, "learning_rate": 5.714157913969843e-06, "loss": 0.9291, "step": 6107 }, { "epoch": 0.4708603145235893, "grad_norm": 3.2179696559906006, "learning_rate": 5.712922244793369e-06, "loss": 0.9505, "step": 6108 }, { "epoch": 0.47093740363860626, "grad_norm": 3.557729959487915, "learning_rate": 5.711686531169883e-06, "loss": 0.999, "step": 6109 }, { "epoch": 0.47101449275362317, "grad_norm": 3.841547727584839, "learning_rate": 5.710450773176428e-06, "loss": 0.987, "step": 6110 }, { "epoch": 0.47109158186864014, "grad_norm": 3.502171039581299, "learning_rate": 5.709214970890049e-06, "loss": 0.9596, "step": 6111 }, { "epoch": 0.4711686709836571, "grad_norm": 3.86859393119812, "learning_rate": 5.707979124387788e-06, "loss": 0.9704, "step": 6112 }, { "epoch": 0.4712457600986741, "grad_norm": 3.8467605113983154, "learning_rate": 5.706743233746695e-06, "loss": 0.9401, "step": 6113 }, { "epoch": 0.47132284921369105, "grad_norm": 3.454573392868042, "learning_rate": 5.705507299043822e-06, "loss": 0.8615, "step": 6114 }, { "epoch": 0.47139993832870797, "grad_norm": 3.3528759479522705, "learning_rate": 5.704271320356223e-06, "loss": 0.8713, "step": 6115 }, { "epoch": 0.47147702744372494, "grad_norm": 3.8421576023101807, "learning_rate": 5.703035297760956e-06, "loss": 1.066, "step": 6116 }, { "epoch": 0.4715541165587419, "grad_norm": 3.7155203819274902, "learning_rate": 5.7017992313350765e-06, "loss": 1.0062, "step": 6117 }, { "epoch": 0.4716312056737589, "grad_norm": 3.5856966972351074, "learning_rate": 5.700563121155651e-06, "loss": 1.0004, "step": 6118 }, { "epoch": 0.47170829478877585, "grad_norm": 4.203090190887451, "learning_rate": 5.699326967299743e-06, "loss": 1.0252, "step": 6119 }, { "epoch": 0.47178538390379277, "grad_norm": 3.741010904312134, "learning_rate": 5.698090769844421e-06, "loss": 1.0402, "step": 6120 }, { "epoch": 0.47186247301880974, "grad_norm": 3.547619104385376, "learning_rate": 5.696854528866755e-06, "loss": 0.8535, "step": 6121 }, { "epoch": 0.4719395621338267, "grad_norm": 3.611316442489624, "learning_rate": 5.695618244443818e-06, "loss": 1.0149, "step": 6122 }, { "epoch": 0.4720166512488437, "grad_norm": 3.562122344970703, "learning_rate": 5.694381916652686e-06, "loss": 0.9871, "step": 6123 }, { "epoch": 0.47209374036386065, "grad_norm": 3.538245916366577, "learning_rate": 5.693145545570439e-06, "loss": 0.8019, "step": 6124 }, { "epoch": 0.47217082947887756, "grad_norm": 3.3615951538085938, "learning_rate": 5.691909131274156e-06, "loss": 0.9203, "step": 6125 }, { "epoch": 0.47224791859389453, "grad_norm": 3.911485433578491, "learning_rate": 5.6906726738409215e-06, "loss": 0.9443, "step": 6126 }, { "epoch": 0.4723250077089115, "grad_norm": 3.790710926055908, "learning_rate": 5.689436173347825e-06, "loss": 0.9956, "step": 6127 }, { "epoch": 0.4724020968239285, "grad_norm": 3.447726249694824, "learning_rate": 5.688199629871952e-06, "loss": 0.9532, "step": 6128 }, { "epoch": 0.47247918593894545, "grad_norm": 3.5070300102233887, "learning_rate": 5.686963043490398e-06, "loss": 0.9041, "step": 6129 }, { "epoch": 0.47255627505396236, "grad_norm": 3.885089635848999, "learning_rate": 5.6857264142802535e-06, "loss": 1.0288, "step": 6130 }, { "epoch": 0.47263336416897933, "grad_norm": 3.5010008811950684, "learning_rate": 5.68448974231862e-06, "loss": 0.8738, "step": 6131 }, { "epoch": 0.4727104532839963, "grad_norm": 3.6111185550689697, "learning_rate": 5.683253027682597e-06, "loss": 1.0589, "step": 6132 }, { "epoch": 0.47278754239901327, "grad_norm": 3.557774305343628, "learning_rate": 5.682016270449286e-06, "loss": 1.004, "step": 6133 }, { "epoch": 0.47286463151403024, "grad_norm": 3.7765564918518066, "learning_rate": 5.680779470695791e-06, "loss": 0.9748, "step": 6134 }, { "epoch": 0.47294172062904716, "grad_norm": 3.426037549972534, "learning_rate": 5.679542628499224e-06, "loss": 0.8693, "step": 6135 }, { "epoch": 0.47301880974406413, "grad_norm": 3.553133487701416, "learning_rate": 5.678305743936692e-06, "loss": 0.9648, "step": 6136 }, { "epoch": 0.4730958988590811, "grad_norm": 3.720686197280884, "learning_rate": 5.677068817085311e-06, "loss": 1.0429, "step": 6137 }, { "epoch": 0.47317298797409807, "grad_norm": 3.6694304943084717, "learning_rate": 5.675831848022195e-06, "loss": 0.9811, "step": 6138 }, { "epoch": 0.47325007708911504, "grad_norm": 3.528355121612549, "learning_rate": 5.674594836824463e-06, "loss": 1.0162, "step": 6139 }, { "epoch": 0.47332716620413195, "grad_norm": 3.9201457500457764, "learning_rate": 5.673357783569238e-06, "loss": 1.0571, "step": 6140 }, { "epoch": 0.4734042553191489, "grad_norm": 3.4825947284698486, "learning_rate": 5.672120688333642e-06, "loss": 0.8942, "step": 6141 }, { "epoch": 0.4734813444341659, "grad_norm": 3.4814279079437256, "learning_rate": 5.6708835511948035e-06, "loss": 0.8944, "step": 6142 }, { "epoch": 0.47355843354918287, "grad_norm": 3.8149969577789307, "learning_rate": 5.669646372229849e-06, "loss": 0.9892, "step": 6143 }, { "epoch": 0.47363552266419984, "grad_norm": 3.37603497505188, "learning_rate": 5.6684091515159105e-06, "loss": 0.8202, "step": 6144 }, { "epoch": 0.47371261177921675, "grad_norm": 3.226973056793213, "learning_rate": 5.667171889130125e-06, "loss": 0.9294, "step": 6145 }, { "epoch": 0.4737897008942337, "grad_norm": 3.621938467025757, "learning_rate": 5.6659345851496265e-06, "loss": 1.0182, "step": 6146 }, { "epoch": 0.4738667900092507, "grad_norm": 3.5621347427368164, "learning_rate": 5.6646972396515555e-06, "loss": 0.9215, "step": 6147 }, { "epoch": 0.47394387912426766, "grad_norm": 4.244327545166016, "learning_rate": 5.663459852713055e-06, "loss": 1.0142, "step": 6148 }, { "epoch": 0.47402096823928463, "grad_norm": 3.6657721996307373, "learning_rate": 5.662222424411268e-06, "loss": 1.0159, "step": 6149 }, { "epoch": 0.47409805735430155, "grad_norm": 3.6783275604248047, "learning_rate": 5.660984954823342e-06, "loss": 1.0533, "step": 6150 }, { "epoch": 0.4741751464693185, "grad_norm": 3.747204542160034, "learning_rate": 5.659747444026429e-06, "loss": 1.04, "step": 6151 }, { "epoch": 0.4742522355843355, "grad_norm": 3.452821969985962, "learning_rate": 5.658509892097679e-06, "loss": 0.9084, "step": 6152 }, { "epoch": 0.47432932469935246, "grad_norm": 3.9301180839538574, "learning_rate": 5.657272299114248e-06, "loss": 0.8685, "step": 6153 }, { "epoch": 0.47440641381436943, "grad_norm": 3.441697120666504, "learning_rate": 5.656034665153294e-06, "loss": 1.0103, "step": 6154 }, { "epoch": 0.47448350292938635, "grad_norm": 3.8465073108673096, "learning_rate": 5.654796990291974e-06, "loss": 1.0009, "step": 6155 }, { "epoch": 0.4745605920444033, "grad_norm": 3.6904850006103516, "learning_rate": 5.6535592746074554e-06, "loss": 0.9484, "step": 6156 }, { "epoch": 0.4746376811594203, "grad_norm": 3.667325019836426, "learning_rate": 5.6523215181769e-06, "loss": 0.9456, "step": 6157 }, { "epoch": 0.47471477027443726, "grad_norm": 3.483203172683716, "learning_rate": 5.651083721077475e-06, "loss": 1.0455, "step": 6158 }, { "epoch": 0.47479185938945423, "grad_norm": 3.435410499572754, "learning_rate": 5.649845883386355e-06, "loss": 0.8986, "step": 6159 }, { "epoch": 0.47486894850447114, "grad_norm": 3.477846384048462, "learning_rate": 5.6486080051807066e-06, "loss": 0.8902, "step": 6160 }, { "epoch": 0.4749460376194881, "grad_norm": 3.9039952754974365, "learning_rate": 5.647370086537709e-06, "loss": 1.0054, "step": 6161 }, { "epoch": 0.4750231267345051, "grad_norm": 3.6027846336364746, "learning_rate": 5.646132127534541e-06, "loss": 1.1071, "step": 6162 }, { "epoch": 0.47510021584952206, "grad_norm": 4.0150370597839355, "learning_rate": 5.6448941282483795e-06, "loss": 0.9272, "step": 6163 }, { "epoch": 0.475177304964539, "grad_norm": 3.5615651607513428, "learning_rate": 5.64365608875641e-06, "loss": 0.9647, "step": 6164 }, { "epoch": 0.47525439407955594, "grad_norm": 3.8740906715393066, "learning_rate": 5.6424180091358175e-06, "loss": 1.0794, "step": 6165 }, { "epoch": 0.4753314831945729, "grad_norm": 3.5737662315368652, "learning_rate": 5.641179889463788e-06, "loss": 0.9521, "step": 6166 }, { "epoch": 0.4754085723095899, "grad_norm": 3.58293080329895, "learning_rate": 5.639941729817514e-06, "loss": 1.0373, "step": 6167 }, { "epoch": 0.47548566142460685, "grad_norm": 3.5029170513153076, "learning_rate": 5.638703530274187e-06, "loss": 1.0038, "step": 6168 }, { "epoch": 0.4755627505396238, "grad_norm": 3.324521064758301, "learning_rate": 5.637465290911004e-06, "loss": 0.9587, "step": 6169 }, { "epoch": 0.47563983965464074, "grad_norm": 3.4700326919555664, "learning_rate": 5.63622701180516e-06, "loss": 0.9135, "step": 6170 }, { "epoch": 0.4757169287696577, "grad_norm": 3.6384148597717285, "learning_rate": 5.634988693033857e-06, "loss": 0.868, "step": 6171 }, { "epoch": 0.4757940178846747, "grad_norm": 3.50246000289917, "learning_rate": 5.6337503346743e-06, "loss": 0.8695, "step": 6172 }, { "epoch": 0.47587110699969165, "grad_norm": 3.835340738296509, "learning_rate": 5.632511936803689e-06, "loss": 1.0377, "step": 6173 }, { "epoch": 0.4759481961147086, "grad_norm": 3.505685567855835, "learning_rate": 5.631273499499236e-06, "loss": 0.902, "step": 6174 }, { "epoch": 0.47602528522972554, "grad_norm": 3.740123987197876, "learning_rate": 5.630035022838151e-06, "loss": 1.0631, "step": 6175 }, { "epoch": 0.4761023743447425, "grad_norm": 3.739340305328369, "learning_rate": 5.628796506897642e-06, "loss": 1.0683, "step": 6176 }, { "epoch": 0.4761794634597595, "grad_norm": 3.675077438354492, "learning_rate": 5.6275579517549306e-06, "loss": 1.016, "step": 6177 }, { "epoch": 0.47625655257477645, "grad_norm": 3.387910842895508, "learning_rate": 5.62631935748723e-06, "loss": 0.8774, "step": 6178 }, { "epoch": 0.4763336416897934, "grad_norm": 3.4367949962615967, "learning_rate": 5.625080724171761e-06, "loss": 0.9452, "step": 6179 }, { "epoch": 0.47641073080481033, "grad_norm": 3.6171581745147705, "learning_rate": 5.623842051885747e-06, "loss": 0.9478, "step": 6180 }, { "epoch": 0.4764878199198273, "grad_norm": 3.505159854888916, "learning_rate": 5.622603340706411e-06, "loss": 0.9781, "step": 6181 }, { "epoch": 0.4765649090348443, "grad_norm": 3.581728219985962, "learning_rate": 5.621364590710981e-06, "loss": 0.9409, "step": 6182 }, { "epoch": 0.47664199814986125, "grad_norm": 3.7894256114959717, "learning_rate": 5.620125801976687e-06, "loss": 0.9496, "step": 6183 }, { "epoch": 0.4767190872648782, "grad_norm": 3.7810251712799072, "learning_rate": 5.6188869745807614e-06, "loss": 1.0094, "step": 6184 }, { "epoch": 0.47679617637989513, "grad_norm": 3.6951205730438232, "learning_rate": 5.6176481086004395e-06, "loss": 0.8738, "step": 6185 }, { "epoch": 0.4768732654949121, "grad_norm": 3.578882932662964, "learning_rate": 5.6164092041129544e-06, "loss": 0.9924, "step": 6186 }, { "epoch": 0.4769503546099291, "grad_norm": 3.560987949371338, "learning_rate": 5.615170261195549e-06, "loss": 0.9863, "step": 6187 }, { "epoch": 0.47702744372494604, "grad_norm": 3.433151960372925, "learning_rate": 5.613931279925465e-06, "loss": 0.9885, "step": 6188 }, { "epoch": 0.477104532839963, "grad_norm": 3.652071237564087, "learning_rate": 5.612692260379945e-06, "loss": 0.9823, "step": 6189 }, { "epoch": 0.47718162195498, "grad_norm": 3.784919261932373, "learning_rate": 5.611453202636236e-06, "loss": 1.0343, "step": 6190 }, { "epoch": 0.4772587110699969, "grad_norm": 4.090549945831299, "learning_rate": 5.610214106771585e-06, "loss": 1.0221, "step": 6191 }, { "epoch": 0.47733580018501387, "grad_norm": 3.437002182006836, "learning_rate": 5.608974972863245e-06, "loss": 0.9871, "step": 6192 }, { "epoch": 0.47741288930003084, "grad_norm": 3.5076773166656494, "learning_rate": 5.6077358009884705e-06, "loss": 0.8728, "step": 6193 }, { "epoch": 0.4774899784150478, "grad_norm": 3.560811996459961, "learning_rate": 5.606496591224516e-06, "loss": 1.0416, "step": 6194 }, { "epoch": 0.4775670675300648, "grad_norm": 3.939362049102783, "learning_rate": 5.60525734364864e-06, "loss": 1.0182, "step": 6195 }, { "epoch": 0.4776441566450817, "grad_norm": 3.568834066390991, "learning_rate": 5.604018058338104e-06, "loss": 0.8795, "step": 6196 }, { "epoch": 0.47772124576009867, "grad_norm": 3.514498710632324, "learning_rate": 5.602778735370169e-06, "loss": 0.9279, "step": 6197 }, { "epoch": 0.47779833487511564, "grad_norm": 3.4076337814331055, "learning_rate": 5.601539374822103e-06, "loss": 0.9863, "step": 6198 }, { "epoch": 0.4778754239901326, "grad_norm": 3.399474859237671, "learning_rate": 5.600299976771172e-06, "loss": 0.9236, "step": 6199 }, { "epoch": 0.4779525131051496, "grad_norm": 3.3580496311187744, "learning_rate": 5.5990605412946466e-06, "loss": 0.9451, "step": 6200 }, { "epoch": 0.4780296022201665, "grad_norm": 3.756740093231201, "learning_rate": 5.597821068469799e-06, "loss": 1.0399, "step": 6201 }, { "epoch": 0.47810669133518346, "grad_norm": 3.5835258960723877, "learning_rate": 5.596581558373903e-06, "loss": 0.9042, "step": 6202 }, { "epoch": 0.47818378045020044, "grad_norm": 3.941892385482788, "learning_rate": 5.595342011084237e-06, "loss": 1.0083, "step": 6203 }, { "epoch": 0.4782608695652174, "grad_norm": 3.4614713191986084, "learning_rate": 5.594102426678082e-06, "loss": 0.9398, "step": 6204 }, { "epoch": 0.4783379586802344, "grad_norm": 3.545919179916382, "learning_rate": 5.592862805232714e-06, "loss": 1.0258, "step": 6205 }, { "epoch": 0.4784150477952513, "grad_norm": 3.4241621494293213, "learning_rate": 5.591623146825423e-06, "loss": 0.8465, "step": 6206 }, { "epoch": 0.47849213691026826, "grad_norm": 3.324366807937622, "learning_rate": 5.590383451533493e-06, "loss": 0.9298, "step": 6207 }, { "epoch": 0.47856922602528523, "grad_norm": 3.5040102005004883, "learning_rate": 5.589143719434211e-06, "loss": 0.9745, "step": 6208 }, { "epoch": 0.4786463151403022, "grad_norm": 3.782944440841675, "learning_rate": 5.587903950604872e-06, "loss": 1.0106, "step": 6209 }, { "epoch": 0.4787234042553192, "grad_norm": 3.437720537185669, "learning_rate": 5.586664145122764e-06, "loss": 0.9793, "step": 6210 }, { "epoch": 0.4788004933703361, "grad_norm": 4.048053741455078, "learning_rate": 5.585424303065186e-06, "loss": 1.0484, "step": 6211 }, { "epoch": 0.47887758248535306, "grad_norm": 3.8065176010131836, "learning_rate": 5.5841844245094345e-06, "loss": 0.9522, "step": 6212 }, { "epoch": 0.47895467160037003, "grad_norm": 3.6226398944854736, "learning_rate": 5.582944509532809e-06, "loss": 0.974, "step": 6213 }, { "epoch": 0.479031760715387, "grad_norm": 4.275754451751709, "learning_rate": 5.581704558212615e-06, "loss": 1.037, "step": 6214 }, { "epoch": 0.47910884983040397, "grad_norm": 3.503654718399048, "learning_rate": 5.5804645706261515e-06, "loss": 1.0253, "step": 6215 }, { "epoch": 0.4791859389454209, "grad_norm": 3.7088232040405273, "learning_rate": 5.57922454685073e-06, "loss": 1.0347, "step": 6216 }, { "epoch": 0.47926302806043786, "grad_norm": 3.6020233631134033, "learning_rate": 5.577984486963658e-06, "loss": 0.9362, "step": 6217 }, { "epoch": 0.4793401171754548, "grad_norm": 3.534729242324829, "learning_rate": 5.576744391042246e-06, "loss": 1.0147, "step": 6218 }, { "epoch": 0.4794172062904718, "grad_norm": 3.644331693649292, "learning_rate": 5.575504259163807e-06, "loss": 0.9651, "step": 6219 }, { "epoch": 0.47949429540548877, "grad_norm": 3.5781354904174805, "learning_rate": 5.5742640914056615e-06, "loss": 0.9953, "step": 6220 }, { "epoch": 0.4795713845205057, "grad_norm": 3.2821385860443115, "learning_rate": 5.573023887845122e-06, "loss": 0.9041, "step": 6221 }, { "epoch": 0.47964847363552265, "grad_norm": 3.6788458824157715, "learning_rate": 5.57178364855951e-06, "loss": 1.1181, "step": 6222 }, { "epoch": 0.4797255627505396, "grad_norm": 3.8085317611694336, "learning_rate": 5.57054337362615e-06, "loss": 0.8443, "step": 6223 }, { "epoch": 0.4798026518655566, "grad_norm": 3.922316789627075, "learning_rate": 5.569303063122364e-06, "loss": 1.0382, "step": 6224 }, { "epoch": 0.47987974098057357, "grad_norm": 3.4889650344848633, "learning_rate": 5.568062717125483e-06, "loss": 0.8957, "step": 6225 }, { "epoch": 0.4799568300955905, "grad_norm": 3.5855491161346436, "learning_rate": 5.5668223357128325e-06, "loss": 0.9909, "step": 6226 }, { "epoch": 0.48003391921060745, "grad_norm": 3.6739110946655273, "learning_rate": 5.5655819189617445e-06, "loss": 0.8751, "step": 6227 }, { "epoch": 0.4801110083256244, "grad_norm": 3.6241581439971924, "learning_rate": 5.564341466949553e-06, "loss": 0.9153, "step": 6228 }, { "epoch": 0.4801880974406414, "grad_norm": 3.6307456493377686, "learning_rate": 5.5631009797535955e-06, "loss": 0.9424, "step": 6229 }, { "epoch": 0.48026518655565836, "grad_norm": 3.7684690952301025, "learning_rate": 5.561860457451207e-06, "loss": 0.9433, "step": 6230 }, { "epoch": 0.4803422756706753, "grad_norm": 3.5321202278137207, "learning_rate": 5.560619900119729e-06, "loss": 0.9128, "step": 6231 }, { "epoch": 0.48041936478569225, "grad_norm": 3.7544522285461426, "learning_rate": 5.5593793078365036e-06, "loss": 1.0287, "step": 6232 }, { "epoch": 0.4804964539007092, "grad_norm": 3.454862356185913, "learning_rate": 5.5581386806788765e-06, "loss": 0.9557, "step": 6233 }, { "epoch": 0.4805735430157262, "grad_norm": 3.669862747192383, "learning_rate": 5.5568980187241915e-06, "loss": 1.0802, "step": 6234 }, { "epoch": 0.48065063213074316, "grad_norm": 3.8794069290161133, "learning_rate": 5.5556573220498e-06, "loss": 0.9532, "step": 6235 }, { "epoch": 0.4807277212457601, "grad_norm": 3.671264171600342, "learning_rate": 5.554416590733054e-06, "loss": 0.9586, "step": 6236 }, { "epoch": 0.48080481036077705, "grad_norm": 3.6556334495544434, "learning_rate": 5.553175824851304e-06, "loss": 0.8997, "step": 6237 }, { "epoch": 0.480881899475794, "grad_norm": 3.8295559883117676, "learning_rate": 5.551935024481906e-06, "loss": 1.142, "step": 6238 }, { "epoch": 0.480958988590811, "grad_norm": 3.51387357711792, "learning_rate": 5.5506941897022175e-06, "loss": 0.9573, "step": 6239 }, { "epoch": 0.48103607770582796, "grad_norm": 3.6698529720306396, "learning_rate": 5.549453320589598e-06, "loss": 0.9651, "step": 6240 }, { "epoch": 0.4811131668208449, "grad_norm": 3.5171773433685303, "learning_rate": 5.54821241722141e-06, "loss": 0.8298, "step": 6241 }, { "epoch": 0.48119025593586184, "grad_norm": 4.172097682952881, "learning_rate": 5.5469714796750175e-06, "loss": 1.0308, "step": 6242 }, { "epoch": 0.4812673450508788, "grad_norm": 3.4253857135772705, "learning_rate": 5.5457305080277855e-06, "loss": 0.9464, "step": 6243 }, { "epoch": 0.4813444341658958, "grad_norm": 3.268526554107666, "learning_rate": 5.544489502357085e-06, "loss": 0.91, "step": 6244 }, { "epoch": 0.48142152328091276, "grad_norm": 3.801954507827759, "learning_rate": 5.543248462740281e-06, "loss": 0.9596, "step": 6245 }, { "epoch": 0.48149861239592967, "grad_norm": 3.7973077297210693, "learning_rate": 5.542007389254749e-06, "loss": 1.0331, "step": 6246 }, { "epoch": 0.48157570151094664, "grad_norm": 3.8994927406311035, "learning_rate": 5.540766281977865e-06, "loss": 0.9226, "step": 6247 }, { "epoch": 0.4816527906259636, "grad_norm": 3.246563196182251, "learning_rate": 5.539525140987003e-06, "loss": 0.8147, "step": 6248 }, { "epoch": 0.4817298797409806, "grad_norm": 3.5368645191192627, "learning_rate": 5.538283966359545e-06, "loss": 0.9997, "step": 6249 }, { "epoch": 0.48180696885599755, "grad_norm": 4.268583297729492, "learning_rate": 5.537042758172866e-06, "loss": 1.0206, "step": 6250 }, { "epoch": 0.48188405797101447, "grad_norm": 3.504702568054199, "learning_rate": 5.535801516504354e-06, "loss": 0.8864, "step": 6251 }, { "epoch": 0.48196114708603144, "grad_norm": 3.5273540019989014, "learning_rate": 5.534560241431393e-06, "loss": 0.9426, "step": 6252 }, { "epoch": 0.4820382362010484, "grad_norm": 3.7020440101623535, "learning_rate": 5.533318933031368e-06, "loss": 1.0103, "step": 6253 }, { "epoch": 0.4821153253160654, "grad_norm": 3.3781721591949463, "learning_rate": 5.532077591381672e-06, "loss": 0.8451, "step": 6254 }, { "epoch": 0.48219241443108235, "grad_norm": 3.646610975265503, "learning_rate": 5.530836216559692e-06, "loss": 1.0605, "step": 6255 }, { "epoch": 0.48226950354609927, "grad_norm": 3.75663423538208, "learning_rate": 5.5295948086428245e-06, "loss": 1.0645, "step": 6256 }, { "epoch": 0.48234659266111624, "grad_norm": 3.898165702819824, "learning_rate": 5.528353367708462e-06, "loss": 1.0525, "step": 6257 }, { "epoch": 0.4824236817761332, "grad_norm": 3.5271053314208984, "learning_rate": 5.527111893834004e-06, "loss": 0.8598, "step": 6258 }, { "epoch": 0.4825007708911502, "grad_norm": 3.4338533878326416, "learning_rate": 5.52587038709685e-06, "loss": 0.9286, "step": 6259 }, { "epoch": 0.48257786000616715, "grad_norm": 4.084996223449707, "learning_rate": 5.5246288475744016e-06, "loss": 1.1074, "step": 6260 }, { "epoch": 0.48265494912118406, "grad_norm": 3.887216806411743, "learning_rate": 5.52338727534406e-06, "loss": 0.8895, "step": 6261 }, { "epoch": 0.48273203823620103, "grad_norm": 3.458646774291992, "learning_rate": 5.522145670483233e-06, "loss": 0.938, "step": 6262 }, { "epoch": 0.482809127351218, "grad_norm": 3.734957456588745, "learning_rate": 5.520904033069328e-06, "loss": 1.0008, "step": 6263 }, { "epoch": 0.482886216466235, "grad_norm": 3.3216323852539062, "learning_rate": 5.519662363179754e-06, "loss": 1.0191, "step": 6264 }, { "epoch": 0.48296330558125194, "grad_norm": 3.6311726570129395, "learning_rate": 5.518420660891924e-06, "loss": 0.9028, "step": 6265 }, { "epoch": 0.48304039469626886, "grad_norm": 3.6047608852386475, "learning_rate": 5.51717892628325e-06, "loss": 0.9282, "step": 6266 }, { "epoch": 0.48311748381128583, "grad_norm": 3.5420308113098145, "learning_rate": 5.515937159431147e-06, "loss": 0.8657, "step": 6267 }, { "epoch": 0.4831945729263028, "grad_norm": 3.5414748191833496, "learning_rate": 5.514695360413037e-06, "loss": 0.9715, "step": 6268 }, { "epoch": 0.48327166204131977, "grad_norm": 3.513932228088379, "learning_rate": 5.5134535293063355e-06, "loss": 0.8782, "step": 6269 }, { "epoch": 0.48334875115633674, "grad_norm": 3.6613216400146484, "learning_rate": 5.512211666188465e-06, "loss": 1.0077, "step": 6270 }, { "epoch": 0.48342584027135366, "grad_norm": 4.074599742889404, "learning_rate": 5.510969771136852e-06, "loss": 0.9404, "step": 6271 }, { "epoch": 0.48350292938637063, "grad_norm": 3.5803050994873047, "learning_rate": 5.509727844228917e-06, "loss": 0.9411, "step": 6272 }, { "epoch": 0.4835800185013876, "grad_norm": 3.4778783321380615, "learning_rate": 5.5084858855420945e-06, "loss": 0.917, "step": 6273 }, { "epoch": 0.48365710761640457, "grad_norm": 3.6360175609588623, "learning_rate": 5.507243895153808e-06, "loss": 1.0272, "step": 6274 }, { "epoch": 0.48373419673142154, "grad_norm": 3.4350030422210693, "learning_rate": 5.506001873141493e-06, "loss": 0.8781, "step": 6275 }, { "epoch": 0.4838112858464385, "grad_norm": 3.641852378845215, "learning_rate": 5.504759819582581e-06, "loss": 0.9962, "step": 6276 }, { "epoch": 0.4838883749614554, "grad_norm": 3.9919748306274414, "learning_rate": 5.5035177345545085e-06, "loss": 0.9799, "step": 6277 }, { "epoch": 0.4839654640764724, "grad_norm": 3.5844922065734863, "learning_rate": 5.502275618134715e-06, "loss": 0.9189, "step": 6278 }, { "epoch": 0.48404255319148937, "grad_norm": 3.5310840606689453, "learning_rate": 5.5010334704006364e-06, "loss": 0.9687, "step": 6279 }, { "epoch": 0.48411964230650634, "grad_norm": 4.053980350494385, "learning_rate": 5.499791291429714e-06, "loss": 0.8998, "step": 6280 }, { "epoch": 0.4841967314215233, "grad_norm": 3.388463020324707, "learning_rate": 5.498549081299397e-06, "loss": 0.9235, "step": 6281 }, { "epoch": 0.4842738205365402, "grad_norm": 3.387619972229004, "learning_rate": 5.497306840087124e-06, "loss": 0.9145, "step": 6282 }, { "epoch": 0.4843509096515572, "grad_norm": 3.7833995819091797, "learning_rate": 5.496064567870346e-06, "loss": 0.9596, "step": 6283 }, { "epoch": 0.48442799876657416, "grad_norm": 3.767986536026001, "learning_rate": 5.494822264726512e-06, "loss": 1.0506, "step": 6284 }, { "epoch": 0.48450508788159113, "grad_norm": 3.8580732345581055, "learning_rate": 5.4935799307330715e-06, "loss": 1.1381, "step": 6285 }, { "epoch": 0.4845821769966081, "grad_norm": 3.7080979347229004, "learning_rate": 5.492337565967479e-06, "loss": 0.9691, "step": 6286 }, { "epoch": 0.484659266111625, "grad_norm": 3.4065332412719727, "learning_rate": 5.491095170507189e-06, "loss": 0.908, "step": 6287 }, { "epoch": 0.484736355226642, "grad_norm": 3.787928819656372, "learning_rate": 5.4898527444296586e-06, "loss": 0.8999, "step": 6288 }, { "epoch": 0.48481344434165896, "grad_norm": 3.6937663555145264, "learning_rate": 5.488610287812348e-06, "loss": 0.976, "step": 6289 }, { "epoch": 0.48489053345667593, "grad_norm": 3.6718273162841797, "learning_rate": 5.487367800732715e-06, "loss": 0.9509, "step": 6290 }, { "epoch": 0.4849676225716929, "grad_norm": 3.9421653747558594, "learning_rate": 5.486125283268223e-06, "loss": 0.9137, "step": 6291 }, { "epoch": 0.4850447116867098, "grad_norm": 3.546727418899536, "learning_rate": 5.48488273549634e-06, "loss": 0.9182, "step": 6292 }, { "epoch": 0.4851218008017268, "grad_norm": 3.6610233783721924, "learning_rate": 5.483640157494528e-06, "loss": 0.9811, "step": 6293 }, { "epoch": 0.48519888991674376, "grad_norm": 3.579312801361084, "learning_rate": 5.482397549340256e-06, "loss": 0.879, "step": 6294 }, { "epoch": 0.48527597903176073, "grad_norm": 3.8670926094055176, "learning_rate": 5.481154911110995e-06, "loss": 1.0397, "step": 6295 }, { "epoch": 0.4853530681467777, "grad_norm": 3.6566736698150635, "learning_rate": 5.4799122428842185e-06, "loss": 1.0115, "step": 6296 }, { "epoch": 0.4854301572617946, "grad_norm": 3.503852605819702, "learning_rate": 5.478669544737401e-06, "loss": 1.0862, "step": 6297 }, { "epoch": 0.4855072463768116, "grad_norm": 3.8774027824401855, "learning_rate": 5.477426816748014e-06, "loss": 1.0201, "step": 6298 }, { "epoch": 0.48558433549182856, "grad_norm": 3.327522039413452, "learning_rate": 5.476184058993539e-06, "loss": 0.9576, "step": 6299 }, { "epoch": 0.4856614246068455, "grad_norm": 3.4257946014404297, "learning_rate": 5.4749412715514525e-06, "loss": 0.9476, "step": 6300 }, { "epoch": 0.4857385137218625, "grad_norm": 4.544504165649414, "learning_rate": 5.473698454499239e-06, "loss": 1.0781, "step": 6301 }, { "epoch": 0.4858156028368794, "grad_norm": 3.3634119033813477, "learning_rate": 5.47245560791438e-06, "loss": 0.936, "step": 6302 }, { "epoch": 0.4858926919518964, "grad_norm": 3.5162649154663086, "learning_rate": 5.47121273187436e-06, "loss": 1.0161, "step": 6303 }, { "epoch": 0.48596978106691335, "grad_norm": 3.8736398220062256, "learning_rate": 5.4699698264566665e-06, "loss": 0.9404, "step": 6304 }, { "epoch": 0.4860468701819303, "grad_norm": 3.300621509552002, "learning_rate": 5.468726891738789e-06, "loss": 0.9441, "step": 6305 }, { "epoch": 0.4861239592969473, "grad_norm": 3.4393205642700195, "learning_rate": 5.467483927798217e-06, "loss": 0.9706, "step": 6306 }, { "epoch": 0.4862010484119642, "grad_norm": 3.7706298828125, "learning_rate": 5.4662409347124436e-06, "loss": 0.9214, "step": 6307 }, { "epoch": 0.4862781375269812, "grad_norm": 3.6923952102661133, "learning_rate": 5.464997912558963e-06, "loss": 0.8973, "step": 6308 }, { "epoch": 0.48635522664199815, "grad_norm": 3.481060743331909, "learning_rate": 5.46375486141527e-06, "loss": 0.867, "step": 6309 }, { "epoch": 0.4864323157570151, "grad_norm": 3.6562881469726562, "learning_rate": 5.462511781358866e-06, "loss": 0.9865, "step": 6310 }, { "epoch": 0.4865094048720321, "grad_norm": 3.6033737659454346, "learning_rate": 5.461268672467245e-06, "loss": 1.0277, "step": 6311 }, { "epoch": 0.486586493987049, "grad_norm": 3.8710803985595703, "learning_rate": 5.460025534817911e-06, "loss": 0.985, "step": 6312 }, { "epoch": 0.486663583102066, "grad_norm": 3.498439073562622, "learning_rate": 5.458782368488369e-06, "loss": 1.0291, "step": 6313 }, { "epoch": 0.48674067221708295, "grad_norm": 3.638658046722412, "learning_rate": 5.4575391735561216e-06, "loss": 0.9331, "step": 6314 }, { "epoch": 0.4868177613320999, "grad_norm": 3.5828909873962402, "learning_rate": 5.456295950098676e-06, "loss": 1.0794, "step": 6315 }, { "epoch": 0.4868948504471169, "grad_norm": 3.282355308532715, "learning_rate": 5.45505269819354e-06, "loss": 0.9155, "step": 6316 }, { "epoch": 0.4869719395621338, "grad_norm": 3.461933135986328, "learning_rate": 5.453809417918227e-06, "loss": 0.9608, "step": 6317 }, { "epoch": 0.4870490286771508, "grad_norm": 3.6630191802978516, "learning_rate": 5.452566109350248e-06, "loss": 1.0117, "step": 6318 }, { "epoch": 0.48712611779216775, "grad_norm": 3.436558485031128, "learning_rate": 5.451322772567114e-06, "loss": 0.8844, "step": 6319 }, { "epoch": 0.4872032069071847, "grad_norm": 3.7390990257263184, "learning_rate": 5.450079407646343e-06, "loss": 1.0276, "step": 6320 }, { "epoch": 0.4872802960222017, "grad_norm": 3.4452319145202637, "learning_rate": 5.448836014665453e-06, "loss": 0.8985, "step": 6321 }, { "epoch": 0.4873573851372186, "grad_norm": 3.383016347885132, "learning_rate": 5.447592593701961e-06, "loss": 0.969, "step": 6322 }, { "epoch": 0.48743447425223557, "grad_norm": 3.7448880672454834, "learning_rate": 5.446349144833389e-06, "loss": 1.0328, "step": 6323 }, { "epoch": 0.48751156336725254, "grad_norm": 3.59957218170166, "learning_rate": 5.44510566813726e-06, "loss": 1.0494, "step": 6324 }, { "epoch": 0.4875886524822695, "grad_norm": 3.316612720489502, "learning_rate": 5.443862163691097e-06, "loss": 1.0422, "step": 6325 }, { "epoch": 0.4876657415972865, "grad_norm": 3.4998679161071777, "learning_rate": 5.442618631572428e-06, "loss": 0.9016, "step": 6326 }, { "epoch": 0.4877428307123034, "grad_norm": 3.8718786239624023, "learning_rate": 5.44137507185878e-06, "loss": 0.9931, "step": 6327 }, { "epoch": 0.48781991982732037, "grad_norm": 3.7852871417999268, "learning_rate": 5.440131484627681e-06, "loss": 0.9991, "step": 6328 }, { "epoch": 0.48789700894233734, "grad_norm": 3.4875009059906006, "learning_rate": 5.438887869956664e-06, "loss": 0.9499, "step": 6329 }, { "epoch": 0.4879740980573543, "grad_norm": 3.2732343673706055, "learning_rate": 5.437644227923261e-06, "loss": 1.0069, "step": 6330 }, { "epoch": 0.4880511871723713, "grad_norm": 3.7164502143859863, "learning_rate": 5.4364005586050075e-06, "loss": 1.0977, "step": 6331 }, { "epoch": 0.4881282762873882, "grad_norm": 3.7878737449645996, "learning_rate": 5.4351568620794395e-06, "loss": 1.0551, "step": 6332 }, { "epoch": 0.48820536540240517, "grad_norm": 3.5380685329437256, "learning_rate": 5.433913138424094e-06, "loss": 0.8823, "step": 6333 }, { "epoch": 0.48828245451742214, "grad_norm": 3.8582963943481445, "learning_rate": 5.4326693877165125e-06, "loss": 0.9363, "step": 6334 }, { "epoch": 0.4883595436324391, "grad_norm": 3.4837450981140137, "learning_rate": 5.431425610034235e-06, "loss": 0.8931, "step": 6335 }, { "epoch": 0.4884366327474561, "grad_norm": 4.541477680206299, "learning_rate": 5.430181805454805e-06, "loss": 0.9786, "step": 6336 }, { "epoch": 0.488513721862473, "grad_norm": 3.627601146697998, "learning_rate": 5.428937974055769e-06, "loss": 0.9365, "step": 6337 }, { "epoch": 0.48859081097748996, "grad_norm": 3.725983142852783, "learning_rate": 5.427694115914669e-06, "loss": 0.9854, "step": 6338 }, { "epoch": 0.48866790009250693, "grad_norm": 3.6275992393493652, "learning_rate": 5.426450231109058e-06, "loss": 0.9704, "step": 6339 }, { "epoch": 0.4887449892075239, "grad_norm": 3.6166484355926514, "learning_rate": 5.425206319716483e-06, "loss": 1.0062, "step": 6340 }, { "epoch": 0.4888220783225409, "grad_norm": 3.833855152130127, "learning_rate": 5.423962381814496e-06, "loss": 0.983, "step": 6341 }, { "epoch": 0.4888991674375578, "grad_norm": 3.7868459224700928, "learning_rate": 5.422718417480651e-06, "loss": 0.9064, "step": 6342 }, { "epoch": 0.48897625655257476, "grad_norm": 3.3974921703338623, "learning_rate": 5.421474426792501e-06, "loss": 0.965, "step": 6343 }, { "epoch": 0.48905334566759173, "grad_norm": 3.812864065170288, "learning_rate": 5.420230409827604e-06, "loss": 1.0489, "step": 6344 }, { "epoch": 0.4891304347826087, "grad_norm": 3.837310552597046, "learning_rate": 5.418986366663518e-06, "loss": 0.9851, "step": 6345 }, { "epoch": 0.4892075238976257, "grad_norm": 3.419736385345459, "learning_rate": 5.4177422973778015e-06, "loss": 0.9357, "step": 6346 }, { "epoch": 0.4892846130126426, "grad_norm": 3.845294237136841, "learning_rate": 5.416498202048016e-06, "loss": 1.0626, "step": 6347 }, { "epoch": 0.48936170212765956, "grad_norm": 3.5029916763305664, "learning_rate": 5.415254080751725e-06, "loss": 0.9745, "step": 6348 }, { "epoch": 0.48943879124267653, "grad_norm": 3.959080696105957, "learning_rate": 5.414009933566492e-06, "loss": 1.0339, "step": 6349 }, { "epoch": 0.4895158803576935, "grad_norm": 3.6839077472686768, "learning_rate": 5.412765760569886e-06, "loss": 0.9517, "step": 6350 }, { "epoch": 0.48959296947271047, "grad_norm": 3.2615315914154053, "learning_rate": 5.41152156183947e-06, "loss": 0.8866, "step": 6351 }, { "epoch": 0.4896700585877274, "grad_norm": 3.5984644889831543, "learning_rate": 5.410277337452817e-06, "loss": 0.9906, "step": 6352 }, { "epoch": 0.48974714770274436, "grad_norm": 3.3124704360961914, "learning_rate": 5.409033087487498e-06, "loss": 0.8656, "step": 6353 }, { "epoch": 0.4898242368177613, "grad_norm": 3.2847061157226562, "learning_rate": 5.407788812021082e-06, "loss": 0.8378, "step": 6354 }, { "epoch": 0.4899013259327783, "grad_norm": 3.559105157852173, "learning_rate": 5.406544511131146e-06, "loss": 1.0279, "step": 6355 }, { "epoch": 0.48997841504779527, "grad_norm": 3.41558575630188, "learning_rate": 5.405300184895268e-06, "loss": 0.9121, "step": 6356 }, { "epoch": 0.4900555041628122, "grad_norm": 3.4175400733947754, "learning_rate": 5.40405583339102e-06, "loss": 0.8206, "step": 6357 }, { "epoch": 0.49013259327782915, "grad_norm": 3.7878329753875732, "learning_rate": 5.402811456695985e-06, "loss": 0.9255, "step": 6358 }, { "epoch": 0.4902096823928461, "grad_norm": 3.816235303878784, "learning_rate": 5.401567054887741e-06, "loss": 1.1515, "step": 6359 }, { "epoch": 0.4902867715078631, "grad_norm": 3.481640100479126, "learning_rate": 5.400322628043869e-06, "loss": 0.8833, "step": 6360 }, { "epoch": 0.49036386062288007, "grad_norm": 3.5390591621398926, "learning_rate": 5.399078176241958e-06, "loss": 0.9331, "step": 6361 }, { "epoch": 0.49044094973789704, "grad_norm": 3.5117104053497314, "learning_rate": 5.397833699559587e-06, "loss": 0.994, "step": 6362 }, { "epoch": 0.49051803885291395, "grad_norm": 3.922240972518921, "learning_rate": 5.396589198074347e-06, "loss": 1.063, "step": 6363 }, { "epoch": 0.4905951279679309, "grad_norm": 3.7044625282287598, "learning_rate": 5.3953446718638235e-06, "loss": 1.0509, "step": 6364 }, { "epoch": 0.4906722170829479, "grad_norm": 3.929018259048462, "learning_rate": 5.394100121005608e-06, "loss": 1.0066, "step": 6365 }, { "epoch": 0.49074930619796486, "grad_norm": 3.511359214782715, "learning_rate": 5.39285554557729e-06, "loss": 0.959, "step": 6366 }, { "epoch": 0.49082639531298183, "grad_norm": 3.469261646270752, "learning_rate": 5.391610945656464e-06, "loss": 1.0333, "step": 6367 }, { "epoch": 0.49090348442799875, "grad_norm": 3.302649736404419, "learning_rate": 5.390366321320723e-06, "loss": 0.9105, "step": 6368 }, { "epoch": 0.4909805735430157, "grad_norm": 3.9019734859466553, "learning_rate": 5.389121672647666e-06, "loss": 1.1229, "step": 6369 }, { "epoch": 0.4910576626580327, "grad_norm": 3.741459846496582, "learning_rate": 5.387876999714885e-06, "loss": 1.0295, "step": 6370 }, { "epoch": 0.49113475177304966, "grad_norm": 3.3575656414031982, "learning_rate": 5.386632302599985e-06, "loss": 0.9632, "step": 6371 }, { "epoch": 0.49121184088806663, "grad_norm": 3.4289495944976807, "learning_rate": 5.385387581380561e-06, "loss": 0.9537, "step": 6372 }, { "epoch": 0.49128893000308355, "grad_norm": 3.718967914581299, "learning_rate": 5.384142836134217e-06, "loss": 0.953, "step": 6373 }, { "epoch": 0.4913660191181005, "grad_norm": 3.8961894512176514, "learning_rate": 5.382898066938559e-06, "loss": 0.9896, "step": 6374 }, { "epoch": 0.4914431082331175, "grad_norm": 3.611521005630493, "learning_rate": 5.381653273871188e-06, "loss": 1.0437, "step": 6375 }, { "epoch": 0.49152019734813446, "grad_norm": 3.494535446166992, "learning_rate": 5.380408457009711e-06, "loss": 0.9011, "step": 6376 }, { "epoch": 0.49159728646315143, "grad_norm": 3.385871648788452, "learning_rate": 5.37916361643174e-06, "loss": 0.9788, "step": 6377 }, { "epoch": 0.49167437557816834, "grad_norm": 3.541168212890625, "learning_rate": 5.377918752214878e-06, "loss": 0.9361, "step": 6378 }, { "epoch": 0.4917514646931853, "grad_norm": 3.743074417114258, "learning_rate": 5.37667386443674e-06, "loss": 0.9046, "step": 6379 }, { "epoch": 0.4918285538082023, "grad_norm": 3.602600574493408, "learning_rate": 5.375428953174939e-06, "loss": 1.0171, "step": 6380 }, { "epoch": 0.49190564292321926, "grad_norm": 3.6449949741363525, "learning_rate": 5.374184018507086e-06, "loss": 1.0, "step": 6381 }, { "epoch": 0.4919827320382362, "grad_norm": 3.660562515258789, "learning_rate": 5.372939060510797e-06, "loss": 1.0855, "step": 6382 }, { "epoch": 0.49205982115325314, "grad_norm": 3.2304728031158447, "learning_rate": 5.371694079263688e-06, "loss": 0.8088, "step": 6383 }, { "epoch": 0.4921369102682701, "grad_norm": 3.455740451812744, "learning_rate": 5.37044907484338e-06, "loss": 0.9095, "step": 6384 }, { "epoch": 0.4922139993832871, "grad_norm": 3.787229061126709, "learning_rate": 5.369204047327491e-06, "loss": 0.9225, "step": 6385 }, { "epoch": 0.49229108849830405, "grad_norm": 3.7648141384124756, "learning_rate": 5.367958996793641e-06, "loss": 0.9692, "step": 6386 }, { "epoch": 0.492368177613321, "grad_norm": 3.6073803901672363, "learning_rate": 5.366713923319455e-06, "loss": 0.9865, "step": 6387 }, { "epoch": 0.49244526672833794, "grad_norm": 3.283367872238159, "learning_rate": 5.365468826982553e-06, "loss": 0.991, "step": 6388 }, { "epoch": 0.4925223558433549, "grad_norm": 3.9354822635650635, "learning_rate": 5.364223707860563e-06, "loss": 0.9923, "step": 6389 }, { "epoch": 0.4925994449583719, "grad_norm": 3.2957966327667236, "learning_rate": 5.362978566031112e-06, "loss": 0.9225, "step": 6390 }, { "epoch": 0.49267653407338885, "grad_norm": 3.9109017848968506, "learning_rate": 5.361733401571826e-06, "loss": 0.8456, "step": 6391 }, { "epoch": 0.4927536231884058, "grad_norm": 3.773970603942871, "learning_rate": 5.360488214560336e-06, "loss": 0.9238, "step": 6392 }, { "epoch": 0.49283071230342274, "grad_norm": 3.3572847843170166, "learning_rate": 5.359243005074274e-06, "loss": 0.9182, "step": 6393 }, { "epoch": 0.4929078014184397, "grad_norm": 3.6737236976623535, "learning_rate": 5.35799777319127e-06, "loss": 0.994, "step": 6394 }, { "epoch": 0.4929848905334567, "grad_norm": 4.093838214874268, "learning_rate": 5.356752518988961e-06, "loss": 1.013, "step": 6395 }, { "epoch": 0.49306197964847365, "grad_norm": 3.745969295501709, "learning_rate": 5.355507242544978e-06, "loss": 0.9943, "step": 6396 }, { "epoch": 0.4931390687634906, "grad_norm": 3.748204469680786, "learning_rate": 5.35426194393696e-06, "loss": 0.8421, "step": 6397 }, { "epoch": 0.49321615787850753, "grad_norm": 3.6849241256713867, "learning_rate": 5.3530166232425454e-06, "loss": 0.8745, "step": 6398 }, { "epoch": 0.4932932469935245, "grad_norm": 3.651228189468384, "learning_rate": 5.351771280539372e-06, "loss": 1.0544, "step": 6399 }, { "epoch": 0.4933703361085415, "grad_norm": 4.058038234710693, "learning_rate": 5.35052591590508e-06, "loss": 1.0527, "step": 6400 }, { "epoch": 0.49344742522355844, "grad_norm": 4.054307460784912, "learning_rate": 5.349280529417316e-06, "loss": 0.9891, "step": 6401 }, { "epoch": 0.4935245143385754, "grad_norm": 3.8959100246429443, "learning_rate": 5.348035121153716e-06, "loss": 1.0433, "step": 6402 }, { "epoch": 0.49360160345359233, "grad_norm": 3.7364206314086914, "learning_rate": 5.346789691191931e-06, "loss": 0.9561, "step": 6403 }, { "epoch": 0.4936786925686093, "grad_norm": 4.071012496948242, "learning_rate": 5.3455442396096045e-06, "loss": 0.9654, "step": 6404 }, { "epoch": 0.49375578168362627, "grad_norm": 3.570204019546509, "learning_rate": 5.344298766484382e-06, "loss": 0.9465, "step": 6405 }, { "epoch": 0.49383287079864324, "grad_norm": 3.7908997535705566, "learning_rate": 5.343053271893919e-06, "loss": 1.0456, "step": 6406 }, { "epoch": 0.4939099599136602, "grad_norm": 3.9499940872192383, "learning_rate": 5.3418077559158575e-06, "loss": 1.0423, "step": 6407 }, { "epoch": 0.4939870490286771, "grad_norm": 3.3631200790405273, "learning_rate": 5.340562218627854e-06, "loss": 0.9541, "step": 6408 }, { "epoch": 0.4940641381436941, "grad_norm": 3.548306941986084, "learning_rate": 5.339316660107561e-06, "loss": 0.9153, "step": 6409 }, { "epoch": 0.49414122725871107, "grad_norm": 3.9176387786865234, "learning_rate": 5.3380710804326304e-06, "loss": 0.9603, "step": 6410 }, { "epoch": 0.49421831637372804, "grad_norm": 4.336452484130859, "learning_rate": 5.33682547968072e-06, "loss": 1.0792, "step": 6411 }, { "epoch": 0.494295405488745, "grad_norm": 3.4913458824157715, "learning_rate": 5.3355798579294834e-06, "loss": 0.9811, "step": 6412 }, { "epoch": 0.4943724946037619, "grad_norm": 3.5579373836517334, "learning_rate": 5.334334215256582e-06, "loss": 0.9181, "step": 6413 }, { "epoch": 0.4944495837187789, "grad_norm": 3.506436586380005, "learning_rate": 5.333088551739674e-06, "loss": 0.9107, "step": 6414 }, { "epoch": 0.49452667283379587, "grad_norm": 3.682651996612549, "learning_rate": 5.3318428674564196e-06, "loss": 0.989, "step": 6415 }, { "epoch": 0.49460376194881284, "grad_norm": 3.929532527923584, "learning_rate": 5.330597162484481e-06, "loss": 0.8853, "step": 6416 }, { "epoch": 0.4946808510638298, "grad_norm": 3.4837441444396973, "learning_rate": 5.329351436901522e-06, "loss": 0.904, "step": 6417 }, { "epoch": 0.4947579401788467, "grad_norm": 3.567117691040039, "learning_rate": 5.3281056907852054e-06, "loss": 0.9982, "step": 6418 }, { "epoch": 0.4948350292938637, "grad_norm": 3.655592918395996, "learning_rate": 5.3268599242132e-06, "loss": 0.9322, "step": 6419 }, { "epoch": 0.49491211840888066, "grad_norm": 3.478694200515747, "learning_rate": 5.32561413726317e-06, "loss": 0.9695, "step": 6420 }, { "epoch": 0.49498920752389763, "grad_norm": 3.512316942214966, "learning_rate": 5.324368330012785e-06, "loss": 0.9506, "step": 6421 }, { "epoch": 0.4950662966389146, "grad_norm": 3.5826921463012695, "learning_rate": 5.323122502539715e-06, "loss": 0.9291, "step": 6422 }, { "epoch": 0.4951433857539315, "grad_norm": 3.367875337600708, "learning_rate": 5.32187665492163e-06, "loss": 1.0068, "step": 6423 }, { "epoch": 0.4952204748689485, "grad_norm": 3.737788200378418, "learning_rate": 5.320630787236203e-06, "loss": 1.0055, "step": 6424 }, { "epoch": 0.49529756398396546, "grad_norm": 3.3039023876190186, "learning_rate": 5.3193848995611075e-06, "loss": 1.0002, "step": 6425 }, { "epoch": 0.49537465309898243, "grad_norm": 3.7930006980895996, "learning_rate": 5.318138991974016e-06, "loss": 1.1429, "step": 6426 }, { "epoch": 0.4954517422139994, "grad_norm": 3.4855892658233643, "learning_rate": 5.316893064552607e-06, "loss": 0.8802, "step": 6427 }, { "epoch": 0.4955288313290163, "grad_norm": 3.870096206665039, "learning_rate": 5.315647117374556e-06, "loss": 1.0267, "step": 6428 }, { "epoch": 0.4956059204440333, "grad_norm": 3.2889981269836426, "learning_rate": 5.314401150517543e-06, "loss": 0.9023, "step": 6429 }, { "epoch": 0.49568300955905026, "grad_norm": 3.54913330078125, "learning_rate": 5.313155164059247e-06, "loss": 1.023, "step": 6430 }, { "epoch": 0.49576009867406723, "grad_norm": 3.7795615196228027, "learning_rate": 5.311909158077347e-06, "loss": 1.0105, "step": 6431 }, { "epoch": 0.4958371877890842, "grad_norm": 3.661637306213379, "learning_rate": 5.310663132649526e-06, "loss": 1.0102, "step": 6432 }, { "epoch": 0.4959142769041011, "grad_norm": 3.3759381771087646, "learning_rate": 5.30941708785347e-06, "loss": 0.88, "step": 6433 }, { "epoch": 0.4959913660191181, "grad_norm": 3.458970546722412, "learning_rate": 5.3081710237668595e-06, "loss": 0.9249, "step": 6434 }, { "epoch": 0.49606845513413506, "grad_norm": 3.838961362838745, "learning_rate": 5.306924940467383e-06, "loss": 1.082, "step": 6435 }, { "epoch": 0.496145544249152, "grad_norm": 3.4712164402008057, "learning_rate": 5.3056788380327255e-06, "loss": 0.8727, "step": 6436 }, { "epoch": 0.496222633364169, "grad_norm": 3.2734153270721436, "learning_rate": 5.304432716540574e-06, "loss": 0.8984, "step": 6437 }, { "epoch": 0.4962997224791859, "grad_norm": 3.8021316528320312, "learning_rate": 5.303186576068621e-06, "loss": 1.051, "step": 6438 }, { "epoch": 0.4963768115942029, "grad_norm": 3.4965784549713135, "learning_rate": 5.301940416694554e-06, "loss": 0.8909, "step": 6439 }, { "epoch": 0.49645390070921985, "grad_norm": 3.5403904914855957, "learning_rate": 5.300694238496067e-06, "loss": 0.9986, "step": 6440 }, { "epoch": 0.4965309898242368, "grad_norm": 3.335237503051758, "learning_rate": 5.299448041550852e-06, "loss": 0.9258, "step": 6441 }, { "epoch": 0.4966080789392538, "grad_norm": 3.8967440128326416, "learning_rate": 5.2982018259366004e-06, "loss": 1.0064, "step": 6442 }, { "epoch": 0.4966851680542707, "grad_norm": 3.5903615951538086, "learning_rate": 5.296955591731012e-06, "loss": 0.9383, "step": 6443 }, { "epoch": 0.4967622571692877, "grad_norm": 3.501309871673584, "learning_rate": 5.295709339011779e-06, "loss": 0.895, "step": 6444 }, { "epoch": 0.49683934628430465, "grad_norm": 3.6556556224823, "learning_rate": 5.2944630678566e-06, "loss": 0.9801, "step": 6445 }, { "epoch": 0.4969164353993216, "grad_norm": 3.3892955780029297, "learning_rate": 5.293216778343175e-06, "loss": 0.8553, "step": 6446 }, { "epoch": 0.4969935245143386, "grad_norm": 3.6636784076690674, "learning_rate": 5.2919704705492e-06, "loss": 0.9744, "step": 6447 }, { "epoch": 0.49707061362935556, "grad_norm": 3.7394680976867676, "learning_rate": 5.290724144552379e-06, "loss": 0.9662, "step": 6448 }, { "epoch": 0.4971477027443725, "grad_norm": 3.7077627182006836, "learning_rate": 5.2894778004304146e-06, "loss": 0.8284, "step": 6449 }, { "epoch": 0.49722479185938945, "grad_norm": 3.826967716217041, "learning_rate": 5.288231438261008e-06, "loss": 0.8625, "step": 6450 }, { "epoch": 0.4973018809744064, "grad_norm": 3.9576635360717773, "learning_rate": 5.286985058121865e-06, "loss": 0.9651, "step": 6451 }, { "epoch": 0.4973789700894234, "grad_norm": 3.6830098628997803, "learning_rate": 5.285738660090688e-06, "loss": 1.1414, "step": 6452 }, { "epoch": 0.49745605920444036, "grad_norm": 3.410430908203125, "learning_rate": 5.284492244245187e-06, "loss": 0.8979, "step": 6453 }, { "epoch": 0.4975331483194573, "grad_norm": 3.495670795440674, "learning_rate": 5.283245810663068e-06, "loss": 0.8779, "step": 6454 }, { "epoch": 0.49761023743447425, "grad_norm": 3.549842596054077, "learning_rate": 5.281999359422039e-06, "loss": 0.8665, "step": 6455 }, { "epoch": 0.4976873265494912, "grad_norm": 3.7550711631774902, "learning_rate": 5.28075289059981e-06, "loss": 0.9765, "step": 6456 }, { "epoch": 0.4977644156645082, "grad_norm": 3.494971990585327, "learning_rate": 5.279506404274094e-06, "loss": 0.9499, "step": 6457 }, { "epoch": 0.49784150477952516, "grad_norm": 3.89752197265625, "learning_rate": 5.2782599005226e-06, "loss": 1.0034, "step": 6458 }, { "epoch": 0.49791859389454207, "grad_norm": 3.6488029956817627, "learning_rate": 5.277013379423043e-06, "loss": 1.037, "step": 6459 }, { "epoch": 0.49799568300955904, "grad_norm": 3.672584295272827, "learning_rate": 5.275766841053136e-06, "loss": 1.0282, "step": 6460 }, { "epoch": 0.498072772124576, "grad_norm": 3.7023403644561768, "learning_rate": 5.2745202854905946e-06, "loss": 0.9883, "step": 6461 }, { "epoch": 0.498149861239593, "grad_norm": 3.654310941696167, "learning_rate": 5.273273712813135e-06, "loss": 0.9355, "step": 6462 }, { "epoch": 0.49822695035460995, "grad_norm": 3.7347729206085205, "learning_rate": 5.272027123098475e-06, "loss": 1.0296, "step": 6463 }, { "epoch": 0.49830403946962687, "grad_norm": 3.6972110271453857, "learning_rate": 5.2707805164243335e-06, "loss": 0.8939, "step": 6464 }, { "epoch": 0.49838112858464384, "grad_norm": 3.7917075157165527, "learning_rate": 5.269533892868428e-06, "loss": 0.959, "step": 6465 }, { "epoch": 0.4984582176996608, "grad_norm": 3.710469961166382, "learning_rate": 5.268287252508481e-06, "loss": 1.0708, "step": 6466 }, { "epoch": 0.4985353068146778, "grad_norm": 3.4452409744262695, "learning_rate": 5.2670405954222134e-06, "loss": 0.9258, "step": 6467 }, { "epoch": 0.49861239592969475, "grad_norm": 3.4874088764190674, "learning_rate": 5.265793921687347e-06, "loss": 0.9412, "step": 6468 }, { "epoch": 0.49868948504471167, "grad_norm": 3.5041942596435547, "learning_rate": 5.264547231381606e-06, "loss": 0.8923, "step": 6469 }, { "epoch": 0.49876657415972864, "grad_norm": 3.852966785430908, "learning_rate": 5.263300524582717e-06, "loss": 1.0018, "step": 6470 }, { "epoch": 0.4988436632747456, "grad_norm": 3.813434600830078, "learning_rate": 5.2620538013684005e-06, "loss": 1.0497, "step": 6471 }, { "epoch": 0.4989207523897626, "grad_norm": 3.9356420040130615, "learning_rate": 5.26080706181639e-06, "loss": 0.9555, "step": 6472 }, { "epoch": 0.49899784150477955, "grad_norm": 3.891258716583252, "learning_rate": 5.259560306004409e-06, "loss": 0.9037, "step": 6473 }, { "epoch": 0.49907493061979646, "grad_norm": 3.454547643661499, "learning_rate": 5.258313534010187e-06, "loss": 0.8702, "step": 6474 }, { "epoch": 0.49915201973481343, "grad_norm": 3.695034980773926, "learning_rate": 5.257066745911454e-06, "loss": 0.8333, "step": 6475 }, { "epoch": 0.4992291088498304, "grad_norm": 3.522186040878296, "learning_rate": 5.2558199417859415e-06, "loss": 0.948, "step": 6476 }, { "epoch": 0.4993061979648474, "grad_norm": 3.7446117401123047, "learning_rate": 5.25457312171138e-06, "loss": 0.9535, "step": 6477 }, { "epoch": 0.49938328707986435, "grad_norm": 3.8174216747283936, "learning_rate": 5.253326285765502e-06, "loss": 0.9085, "step": 6478 }, { "epoch": 0.49946037619488126, "grad_norm": 3.598435640335083, "learning_rate": 5.252079434026043e-06, "loss": 0.9694, "step": 6479 }, { "epoch": 0.49953746530989823, "grad_norm": 4.055591583251953, "learning_rate": 5.250832566570736e-06, "loss": 0.998, "step": 6480 }, { "epoch": 0.4996145544249152, "grad_norm": 3.655466318130493, "learning_rate": 5.2495856834773195e-06, "loss": 0.8998, "step": 6481 }, { "epoch": 0.4996916435399322, "grad_norm": 3.346543788909912, "learning_rate": 5.248338784823526e-06, "loss": 0.9427, "step": 6482 }, { "epoch": 0.49976873265494914, "grad_norm": 3.703274965286255, "learning_rate": 5.2470918706870975e-06, "loss": 0.8559, "step": 6483 }, { "epoch": 0.49984582176996606, "grad_norm": 3.716444492340088, "learning_rate": 5.245844941145769e-06, "loss": 1.017, "step": 6484 }, { "epoch": 0.49992291088498303, "grad_norm": 3.966560125350952, "learning_rate": 5.244597996277282e-06, "loss": 0.8255, "step": 6485 }, { "epoch": 0.5, "grad_norm": 3.69476318359375, "learning_rate": 5.243351036159377e-06, "loss": 0.8626, "step": 6486 }, { "epoch": 0.5000770891150169, "grad_norm": 3.7048497200012207, "learning_rate": 5.242104060869796e-06, "loss": 0.9411, "step": 6487 }, { "epoch": 0.5001541782300339, "grad_norm": 3.5426058769226074, "learning_rate": 5.2408570704862795e-06, "loss": 0.9875, "step": 6488 }, { "epoch": 0.5002312673450509, "grad_norm": 3.579333782196045, "learning_rate": 5.239610065086574e-06, "loss": 1.0032, "step": 6489 }, { "epoch": 0.5003083564600679, "grad_norm": 3.5092763900756836, "learning_rate": 5.238363044748419e-06, "loss": 0.8675, "step": 6490 }, { "epoch": 0.5003854455750848, "grad_norm": 4.748165130615234, "learning_rate": 5.237116009549565e-06, "loss": 1.0171, "step": 6491 }, { "epoch": 0.5004625346901017, "grad_norm": 3.549006938934326, "learning_rate": 5.235868959567755e-06, "loss": 0.968, "step": 6492 }, { "epoch": 0.5005396238051187, "grad_norm": 4.191278457641602, "learning_rate": 5.2346218948807345e-06, "loss": 0.9313, "step": 6493 }, { "epoch": 0.5006167129201357, "grad_norm": 3.531649589538574, "learning_rate": 5.233374815566258e-06, "loss": 0.8907, "step": 6494 }, { "epoch": 0.5006938020351527, "grad_norm": 3.5904295444488525, "learning_rate": 5.232127721702069e-06, "loss": 0.9111, "step": 6495 }, { "epoch": 0.5007708911501696, "grad_norm": 3.7466869354248047, "learning_rate": 5.230880613365918e-06, "loss": 0.9371, "step": 6496 }, { "epoch": 0.5008479802651865, "grad_norm": 3.7517306804656982, "learning_rate": 5.229633490635558e-06, "loss": 1.0557, "step": 6497 }, { "epoch": 0.5009250693802035, "grad_norm": 3.6371867656707764, "learning_rate": 5.228386353588737e-06, "loss": 1.0518, "step": 6498 }, { "epoch": 0.5010021584952205, "grad_norm": 3.3489437103271484, "learning_rate": 5.2271392023032115e-06, "loss": 0.9757, "step": 6499 }, { "epoch": 0.5010792476102375, "grad_norm": 3.531639575958252, "learning_rate": 5.225892036856734e-06, "loss": 0.9355, "step": 6500 }, { "epoch": 0.5011563367252544, "grad_norm": 3.9964072704315186, "learning_rate": 5.224644857327055e-06, "loss": 0.9836, "step": 6501 }, { "epoch": 0.5012334258402713, "grad_norm": 3.8607258796691895, "learning_rate": 5.223397663791935e-06, "loss": 1.0546, "step": 6502 }, { "epoch": 0.5013105149552883, "grad_norm": 3.4289846420288086, "learning_rate": 5.222150456329127e-06, "loss": 0.8784, "step": 6503 }, { "epoch": 0.5013876040703052, "grad_norm": 3.9486873149871826, "learning_rate": 5.220903235016388e-06, "loss": 1.0408, "step": 6504 }, { "epoch": 0.5014646931853223, "grad_norm": 3.4474892616271973, "learning_rate": 5.2196559999314765e-06, "loss": 0.8746, "step": 6505 }, { "epoch": 0.5015417823003392, "grad_norm": 3.749389171600342, "learning_rate": 5.218408751152152e-06, "loss": 1.0857, "step": 6506 }, { "epoch": 0.5016188714153561, "grad_norm": 3.59549880027771, "learning_rate": 5.217161488756172e-06, "loss": 0.9574, "step": 6507 }, { "epoch": 0.5016959605303731, "grad_norm": 3.458582878112793, "learning_rate": 5.215914212821298e-06, "loss": 0.9595, "step": 6508 }, { "epoch": 0.50177304964539, "grad_norm": 3.547104835510254, "learning_rate": 5.214666923425291e-06, "loss": 0.9317, "step": 6509 }, { "epoch": 0.5018501387604071, "grad_norm": 3.983787775039673, "learning_rate": 5.213419620645914e-06, "loss": 1.096, "step": 6510 }, { "epoch": 0.501927227875424, "grad_norm": 3.5246825218200684, "learning_rate": 5.212172304560928e-06, "loss": 0.9323, "step": 6511 }, { "epoch": 0.5020043169904409, "grad_norm": 3.895063877105713, "learning_rate": 5.2109249752480985e-06, "loss": 0.9427, "step": 6512 }, { "epoch": 0.5020814061054579, "grad_norm": 4.591753005981445, "learning_rate": 5.20967763278519e-06, "loss": 1.0114, "step": 6513 }, { "epoch": 0.5021584952204748, "grad_norm": 4.11314582824707, "learning_rate": 5.208430277249965e-06, "loss": 1.0685, "step": 6514 }, { "epoch": 0.5022355843354919, "grad_norm": 3.453099012374878, "learning_rate": 5.207182908720192e-06, "loss": 0.9276, "step": 6515 }, { "epoch": 0.5023126734505088, "grad_norm": 3.538799285888672, "learning_rate": 5.205935527273638e-06, "loss": 0.9903, "step": 6516 }, { "epoch": 0.5023897625655257, "grad_norm": 3.893820285797119, "learning_rate": 5.204688132988071e-06, "loss": 0.8881, "step": 6517 }, { "epoch": 0.5024668516805427, "grad_norm": 4.181878089904785, "learning_rate": 5.203440725941259e-06, "loss": 0.9407, "step": 6518 }, { "epoch": 0.5025439407955596, "grad_norm": 3.6423377990722656, "learning_rate": 5.2021933062109705e-06, "loss": 1.031, "step": 6519 }, { "epoch": 0.5026210299105767, "grad_norm": 3.723810911178589, "learning_rate": 5.200945873874979e-06, "loss": 1.0118, "step": 6520 }, { "epoch": 0.5026981190255936, "grad_norm": 3.465095043182373, "learning_rate": 5.19969842901105e-06, "loss": 0.942, "step": 6521 }, { "epoch": 0.5027752081406105, "grad_norm": 3.6051387786865234, "learning_rate": 5.198450971696959e-06, "loss": 0.8803, "step": 6522 }, { "epoch": 0.5028522972556275, "grad_norm": 4.09804630279541, "learning_rate": 5.197203502010478e-06, "loss": 0.8432, "step": 6523 }, { "epoch": 0.5029293863706444, "grad_norm": 4.076051712036133, "learning_rate": 5.19595602002938e-06, "loss": 0.9762, "step": 6524 }, { "epoch": 0.5030064754856615, "grad_norm": 3.7992618083953857, "learning_rate": 5.194708525831439e-06, "loss": 0.8912, "step": 6525 }, { "epoch": 0.5030835646006784, "grad_norm": 3.59840989112854, "learning_rate": 5.1934610194944306e-06, "loss": 0.874, "step": 6526 }, { "epoch": 0.5031606537156953, "grad_norm": 3.5978050231933594, "learning_rate": 5.192213501096129e-06, "loss": 0.9612, "step": 6527 }, { "epoch": 0.5032377428307123, "grad_norm": 3.7155649662017822, "learning_rate": 5.1909659707143105e-06, "loss": 1.042, "step": 6528 }, { "epoch": 0.5033148319457292, "grad_norm": 3.7677228450775146, "learning_rate": 5.189718428426753e-06, "loss": 1.0446, "step": 6529 }, { "epoch": 0.5033919210607463, "grad_norm": 3.759453773498535, "learning_rate": 5.188470874311234e-06, "loss": 1.0825, "step": 6530 }, { "epoch": 0.5034690101757632, "grad_norm": 3.622924327850342, "learning_rate": 5.187223308445534e-06, "loss": 0.9703, "step": 6531 }, { "epoch": 0.5035460992907801, "grad_norm": 3.712109088897705, "learning_rate": 5.185975730907428e-06, "loss": 1.0215, "step": 6532 }, { "epoch": 0.5036231884057971, "grad_norm": 3.5897741317749023, "learning_rate": 5.184728141774699e-06, "loss": 0.919, "step": 6533 }, { "epoch": 0.503700277520814, "grad_norm": 3.6697773933410645, "learning_rate": 5.183480541125128e-06, "loss": 1.0769, "step": 6534 }, { "epoch": 0.5037773666358311, "grad_norm": 3.3042874336242676, "learning_rate": 5.182232929036495e-06, "loss": 0.9561, "step": 6535 }, { "epoch": 0.503854455750848, "grad_norm": 3.4754981994628906, "learning_rate": 5.180985305586581e-06, "loss": 0.9105, "step": 6536 }, { "epoch": 0.5039315448658649, "grad_norm": 3.574561834335327, "learning_rate": 5.179737670853173e-06, "loss": 1.0152, "step": 6537 }, { "epoch": 0.5040086339808819, "grad_norm": 3.8162965774536133, "learning_rate": 5.17849002491405e-06, "loss": 0.8418, "step": 6538 }, { "epoch": 0.5040857230958988, "grad_norm": 3.736490249633789, "learning_rate": 5.177242367846999e-06, "loss": 1.0162, "step": 6539 }, { "epoch": 0.5041628122109159, "grad_norm": 3.601933240890503, "learning_rate": 5.175994699729806e-06, "loss": 0.9982, "step": 6540 }, { "epoch": 0.5042399013259328, "grad_norm": 4.0474443435668945, "learning_rate": 5.174747020640253e-06, "loss": 1.0384, "step": 6541 }, { "epoch": 0.5043169904409497, "grad_norm": 3.8889949321746826, "learning_rate": 5.17349933065613e-06, "loss": 0.8445, "step": 6542 }, { "epoch": 0.5043940795559667, "grad_norm": 3.7362418174743652, "learning_rate": 5.1722516298552206e-06, "loss": 0.9535, "step": 6543 }, { "epoch": 0.5044711686709836, "grad_norm": 3.572842597961426, "learning_rate": 5.171003918315316e-06, "loss": 0.9203, "step": 6544 }, { "epoch": 0.5045482577860007, "grad_norm": 3.86114501953125, "learning_rate": 5.169756196114202e-06, "loss": 0.9648, "step": 6545 }, { "epoch": 0.5046253469010176, "grad_norm": 3.9800467491149902, "learning_rate": 5.1685084633296665e-06, "loss": 0.9466, "step": 6546 }, { "epoch": 0.5047024360160345, "grad_norm": 3.3460676670074463, "learning_rate": 5.167260720039504e-06, "loss": 0.8801, "step": 6547 }, { "epoch": 0.5047795251310515, "grad_norm": 3.8302457332611084, "learning_rate": 5.1660129663215e-06, "loss": 1.0093, "step": 6548 }, { "epoch": 0.5048566142460684, "grad_norm": 4.078864574432373, "learning_rate": 5.164765202253448e-06, "loss": 1.0053, "step": 6549 }, { "epoch": 0.5049337033610855, "grad_norm": 3.9565751552581787, "learning_rate": 5.163517427913139e-06, "loss": 1.0029, "step": 6550 }, { "epoch": 0.5050107924761024, "grad_norm": 3.460613250732422, "learning_rate": 5.162269643378365e-06, "loss": 0.9372, "step": 6551 }, { "epoch": 0.5050878815911193, "grad_norm": 3.854912757873535, "learning_rate": 5.161021848726919e-06, "loss": 0.9244, "step": 6552 }, { "epoch": 0.5051649707061363, "grad_norm": 3.741788148880005, "learning_rate": 5.159774044036595e-06, "loss": 0.9823, "step": 6553 }, { "epoch": 0.5052420598211532, "grad_norm": 3.4191572666168213, "learning_rate": 5.1585262293851865e-06, "loss": 0.9462, "step": 6554 }, { "epoch": 0.5053191489361702, "grad_norm": 3.4478116035461426, "learning_rate": 5.1572784048504894e-06, "loss": 0.9788, "step": 6555 }, { "epoch": 0.5053962380511872, "grad_norm": 3.257162094116211, "learning_rate": 5.156030570510298e-06, "loss": 0.8265, "step": 6556 }, { "epoch": 0.5054733271662041, "grad_norm": 3.8727734088897705, "learning_rate": 5.154782726442409e-06, "loss": 0.9721, "step": 6557 }, { "epoch": 0.5055504162812211, "grad_norm": 3.7539451122283936, "learning_rate": 5.153534872724618e-06, "loss": 0.9972, "step": 6558 }, { "epoch": 0.505627505396238, "grad_norm": 3.6078336238861084, "learning_rate": 5.152287009434723e-06, "loss": 1.0208, "step": 6559 }, { "epoch": 0.505704594511255, "grad_norm": 3.6347334384918213, "learning_rate": 5.1510391366505204e-06, "loss": 0.9033, "step": 6560 }, { "epoch": 0.505781683626272, "grad_norm": 3.9092540740966797, "learning_rate": 5.149791254449812e-06, "loss": 0.9415, "step": 6561 }, { "epoch": 0.5058587727412889, "grad_norm": 3.516451597213745, "learning_rate": 5.148543362910393e-06, "loss": 1.0037, "step": 6562 }, { "epoch": 0.5059358618563059, "grad_norm": 4.396605968475342, "learning_rate": 5.147295462110066e-06, "loss": 1.0104, "step": 6563 }, { "epoch": 0.5060129509713228, "grad_norm": 4.63103723526001, "learning_rate": 5.14604755212663e-06, "loss": 1.0168, "step": 6564 }, { "epoch": 0.5060900400863398, "grad_norm": 3.5548949241638184, "learning_rate": 5.144799633037884e-06, "loss": 0.8663, "step": 6565 }, { "epoch": 0.5061671292013568, "grad_norm": 3.333975315093994, "learning_rate": 5.143551704921632e-06, "loss": 0.8946, "step": 6566 }, { "epoch": 0.5062442183163737, "grad_norm": 4.707441806793213, "learning_rate": 5.142303767855674e-06, "loss": 0.9074, "step": 6567 }, { "epoch": 0.5063213074313907, "grad_norm": 3.944218397140503, "learning_rate": 5.141055821917814e-06, "loss": 0.9857, "step": 6568 }, { "epoch": 0.5063983965464076, "grad_norm": 3.700016975402832, "learning_rate": 5.139807867185853e-06, "loss": 0.9898, "step": 6569 }, { "epoch": 0.5064754856614246, "grad_norm": 3.7743771076202393, "learning_rate": 5.138559903737596e-06, "loss": 0.9928, "step": 6570 }, { "epoch": 0.5065525747764416, "grad_norm": 3.386918067932129, "learning_rate": 5.137311931650847e-06, "loss": 0.9561, "step": 6571 }, { "epoch": 0.5066296638914586, "grad_norm": 3.41721510887146, "learning_rate": 5.136063951003409e-06, "loss": 0.9306, "step": 6572 }, { "epoch": 0.5067067530064755, "grad_norm": 3.741901397705078, "learning_rate": 5.134815961873089e-06, "loss": 1.0163, "step": 6573 }, { "epoch": 0.5067838421214924, "grad_norm": 3.600267171859741, "learning_rate": 5.133567964337693e-06, "loss": 1.0534, "step": 6574 }, { "epoch": 0.5068609312365094, "grad_norm": 3.748769760131836, "learning_rate": 5.132319958475025e-06, "loss": 0.8298, "step": 6575 }, { "epoch": 0.5069380203515264, "grad_norm": 3.435581684112549, "learning_rate": 5.131071944362893e-06, "loss": 0.932, "step": 6576 }, { "epoch": 0.5070151094665434, "grad_norm": 3.6183292865753174, "learning_rate": 5.129823922079105e-06, "loss": 0.9721, "step": 6577 }, { "epoch": 0.5070921985815603, "grad_norm": 3.3050825595855713, "learning_rate": 5.128575891701467e-06, "loss": 0.9811, "step": 6578 }, { "epoch": 0.5071692876965772, "grad_norm": 3.783656120300293, "learning_rate": 5.127327853307788e-06, "loss": 0.8849, "step": 6579 }, { "epoch": 0.5072463768115942, "grad_norm": 3.649453639984131, "learning_rate": 5.126079806975877e-06, "loss": 1.0261, "step": 6580 }, { "epoch": 0.5073234659266112, "grad_norm": 3.476015567779541, "learning_rate": 5.124831752783543e-06, "loss": 0.9128, "step": 6581 }, { "epoch": 0.5074005550416282, "grad_norm": 3.546862840652466, "learning_rate": 5.123583690808596e-06, "loss": 1.0003, "step": 6582 }, { "epoch": 0.5074776441566451, "grad_norm": 3.592933177947998, "learning_rate": 5.122335621128844e-06, "loss": 0.9616, "step": 6583 }, { "epoch": 0.507554733271662, "grad_norm": 3.251722812652588, "learning_rate": 5.121087543822103e-06, "loss": 0.9076, "step": 6584 }, { "epoch": 0.507631822386679, "grad_norm": 3.8498170375823975, "learning_rate": 5.119839458966179e-06, "loss": 0.9246, "step": 6585 }, { "epoch": 0.507708911501696, "grad_norm": 3.848869800567627, "learning_rate": 5.118591366638885e-06, "loss": 0.9279, "step": 6586 }, { "epoch": 0.507786000616713, "grad_norm": 3.7381491661071777, "learning_rate": 5.117343266918035e-06, "loss": 0.9364, "step": 6587 }, { "epoch": 0.5078630897317299, "grad_norm": 3.8815040588378906, "learning_rate": 5.116095159881438e-06, "loss": 0.9837, "step": 6588 }, { "epoch": 0.5079401788467468, "grad_norm": 3.340867519378662, "learning_rate": 5.11484704560691e-06, "loss": 0.9044, "step": 6589 }, { "epoch": 0.5080172679617638, "grad_norm": 3.5740439891815186, "learning_rate": 5.113598924172264e-06, "loss": 0.9594, "step": 6590 }, { "epoch": 0.5080943570767807, "grad_norm": 3.547769784927368, "learning_rate": 5.112350795655313e-06, "loss": 0.8794, "step": 6591 }, { "epoch": 0.5081714461917978, "grad_norm": 3.2799148559570312, "learning_rate": 5.1111026601338735e-06, "loss": 0.9809, "step": 6592 }, { "epoch": 0.5082485353068147, "grad_norm": 3.7307305335998535, "learning_rate": 5.109854517685756e-06, "loss": 0.9173, "step": 6593 }, { "epoch": 0.5083256244218316, "grad_norm": 3.4130656719207764, "learning_rate": 5.108606368388779e-06, "loss": 0.9393, "step": 6594 }, { "epoch": 0.5084027135368486, "grad_norm": 3.4056098461151123, "learning_rate": 5.107358212320758e-06, "loss": 0.958, "step": 6595 }, { "epoch": 0.5084798026518655, "grad_norm": 3.6553897857666016, "learning_rate": 5.106110049559507e-06, "loss": 0.9729, "step": 6596 }, { "epoch": 0.5085568917668826, "grad_norm": 4.16010046005249, "learning_rate": 5.1048618801828454e-06, "loss": 0.9938, "step": 6597 }, { "epoch": 0.5086339808818995, "grad_norm": 3.6851933002471924, "learning_rate": 5.1036137042685885e-06, "loss": 0.9174, "step": 6598 }, { "epoch": 0.5087110699969164, "grad_norm": 3.8315787315368652, "learning_rate": 5.1023655218945534e-06, "loss": 0.9586, "step": 6599 }, { "epoch": 0.5087881591119334, "grad_norm": 3.3780720233917236, "learning_rate": 5.101117333138558e-06, "loss": 0.891, "step": 6600 }, { "epoch": 0.5088652482269503, "grad_norm": 3.6677708625793457, "learning_rate": 5.099869138078421e-06, "loss": 0.924, "step": 6601 }, { "epoch": 0.5089423373419674, "grad_norm": 3.382268190383911, "learning_rate": 5.09862093679196e-06, "loss": 0.9927, "step": 6602 }, { "epoch": 0.5090194264569843, "grad_norm": 3.3687045574188232, "learning_rate": 5.097372729356997e-06, "loss": 0.9537, "step": 6603 }, { "epoch": 0.5090965155720012, "grad_norm": 3.567380905151367, "learning_rate": 5.096124515851344e-06, "loss": 0.9901, "step": 6604 }, { "epoch": 0.5091736046870182, "grad_norm": 3.5415704250335693, "learning_rate": 5.094876296352829e-06, "loss": 1.0561, "step": 6605 }, { "epoch": 0.5092506938020351, "grad_norm": 4.186346530914307, "learning_rate": 5.093628070939266e-06, "loss": 1.0447, "step": 6606 }, { "epoch": 0.5093277829170522, "grad_norm": 3.475534677505493, "learning_rate": 5.0923798396884785e-06, "loss": 0.9391, "step": 6607 }, { "epoch": 0.5094048720320691, "grad_norm": 3.3865292072296143, "learning_rate": 5.0911316026782865e-06, "loss": 1.018, "step": 6608 }, { "epoch": 0.509481961147086, "grad_norm": 3.4238085746765137, "learning_rate": 5.089883359986512e-06, "loss": 0.9044, "step": 6609 }, { "epoch": 0.509559050262103, "grad_norm": 3.9409427642822266, "learning_rate": 5.088635111690974e-06, "loss": 0.9538, "step": 6610 }, { "epoch": 0.5096361393771199, "grad_norm": 3.5488216876983643, "learning_rate": 5.087386857869496e-06, "loss": 0.9652, "step": 6611 }, { "epoch": 0.509713228492137, "grad_norm": 3.679537534713745, "learning_rate": 5.086138598599901e-06, "loss": 0.9817, "step": 6612 }, { "epoch": 0.5097903176071539, "grad_norm": 3.6628053188323975, "learning_rate": 5.08489033396001e-06, "loss": 1.0088, "step": 6613 }, { "epoch": 0.5098674067221708, "grad_norm": 3.9006776809692383, "learning_rate": 5.083642064027646e-06, "loss": 0.9371, "step": 6614 }, { "epoch": 0.5099444958371878, "grad_norm": 3.7066752910614014, "learning_rate": 5.082393788880633e-06, "loss": 0.8783, "step": 6615 }, { "epoch": 0.5100215849522047, "grad_norm": 3.3197736740112305, "learning_rate": 5.081145508596794e-06, "loss": 0.9278, "step": 6616 }, { "epoch": 0.5100986740672218, "grad_norm": 3.691556930541992, "learning_rate": 5.079897223253953e-06, "loss": 1.0941, "step": 6617 }, { "epoch": 0.5101757631822387, "grad_norm": 3.5036165714263916, "learning_rate": 5.078648932929933e-06, "loss": 0.903, "step": 6618 }, { "epoch": 0.5102528522972556, "grad_norm": 3.766944408416748, "learning_rate": 5.077400637702561e-06, "loss": 0.8997, "step": 6619 }, { "epoch": 0.5103299414122726, "grad_norm": 3.8213517665863037, "learning_rate": 5.076152337649658e-06, "loss": 0.955, "step": 6620 }, { "epoch": 0.5104070305272895, "grad_norm": 3.9807519912719727, "learning_rate": 5.074904032849052e-06, "loss": 0.9852, "step": 6621 }, { "epoch": 0.5104841196423066, "grad_norm": 3.3743574619293213, "learning_rate": 5.0736557233785685e-06, "loss": 0.8994, "step": 6622 }, { "epoch": 0.5105612087573235, "grad_norm": 3.691159963607788, "learning_rate": 5.072407409316031e-06, "loss": 1.0397, "step": 6623 }, { "epoch": 0.5106382978723404, "grad_norm": 3.5302302837371826, "learning_rate": 5.071159090739266e-06, "loss": 0.8242, "step": 6624 }, { "epoch": 0.5107153869873574, "grad_norm": 4.0092926025390625, "learning_rate": 5.069910767726103e-06, "loss": 0.9717, "step": 6625 }, { "epoch": 0.5107924761023743, "grad_norm": 4.246231555938721, "learning_rate": 5.068662440354362e-06, "loss": 1.1298, "step": 6626 }, { "epoch": 0.5108695652173914, "grad_norm": 3.4625141620635986, "learning_rate": 5.067414108701876e-06, "loss": 0.9316, "step": 6627 }, { "epoch": 0.5109466543324083, "grad_norm": 3.577906847000122, "learning_rate": 5.066165772846468e-06, "loss": 0.9469, "step": 6628 }, { "epoch": 0.5110237434474252, "grad_norm": 3.831422805786133, "learning_rate": 5.064917432865968e-06, "loss": 0.9549, "step": 6629 }, { "epoch": 0.5111008325624422, "grad_norm": 3.276904344558716, "learning_rate": 5.063669088838201e-06, "loss": 0.8982, "step": 6630 }, { "epoch": 0.5111779216774591, "grad_norm": 3.7430529594421387, "learning_rate": 5.0624207408409964e-06, "loss": 0.9587, "step": 6631 }, { "epoch": 0.5112550107924761, "grad_norm": 3.5662450790405273, "learning_rate": 5.061172388952184e-06, "loss": 0.8223, "step": 6632 }, { "epoch": 0.5113320999074931, "grad_norm": 4.065229892730713, "learning_rate": 5.059924033249587e-06, "loss": 0.9091, "step": 6633 }, { "epoch": 0.51140918902251, "grad_norm": 3.51495623588562, "learning_rate": 5.058675673811038e-06, "loss": 0.9732, "step": 6634 }, { "epoch": 0.511486278137527, "grad_norm": 3.6267058849334717, "learning_rate": 5.057427310714366e-06, "loss": 0.9541, "step": 6635 }, { "epoch": 0.5115633672525439, "grad_norm": 3.579072952270508, "learning_rate": 5.056178944037396e-06, "loss": 0.9344, "step": 6636 }, { "epoch": 0.511640456367561, "grad_norm": 3.7198750972747803, "learning_rate": 5.054930573857961e-06, "loss": 0.9487, "step": 6637 }, { "epoch": 0.5117175454825779, "grad_norm": 3.6153461933135986, "learning_rate": 5.05368220025389e-06, "loss": 0.9901, "step": 6638 }, { "epoch": 0.5117946345975948, "grad_norm": 3.899430513381958, "learning_rate": 5.052433823303011e-06, "loss": 0.9722, "step": 6639 }, { "epoch": 0.5118717237126118, "grad_norm": 3.687140703201294, "learning_rate": 5.0511854430831564e-06, "loss": 0.9444, "step": 6640 }, { "epoch": 0.5119488128276287, "grad_norm": 3.4240894317626953, "learning_rate": 5.0499370596721516e-06, "loss": 0.9509, "step": 6641 }, { "epoch": 0.5120259019426457, "grad_norm": 3.548834800720215, "learning_rate": 5.048688673147831e-06, "loss": 0.8858, "step": 6642 }, { "epoch": 0.5121029910576627, "grad_norm": 3.7235825061798096, "learning_rate": 5.047440283588025e-06, "loss": 0.899, "step": 6643 }, { "epoch": 0.5121800801726796, "grad_norm": 3.849551200866699, "learning_rate": 5.0461918910705625e-06, "loss": 0.9537, "step": 6644 }, { "epoch": 0.5122571692876966, "grad_norm": 3.7150797843933105, "learning_rate": 5.044943495673273e-06, "loss": 0.9883, "step": 6645 }, { "epoch": 0.5123342584027135, "grad_norm": 4.125245571136475, "learning_rate": 5.043695097473992e-06, "loss": 0.7933, "step": 6646 }, { "epoch": 0.5124113475177305, "grad_norm": 3.6767773628234863, "learning_rate": 5.042446696550546e-06, "loss": 0.8519, "step": 6647 }, { "epoch": 0.5124884366327475, "grad_norm": 3.659404754638672, "learning_rate": 5.041198292980768e-06, "loss": 0.8724, "step": 6648 }, { "epoch": 0.5125655257477644, "grad_norm": 3.8711514472961426, "learning_rate": 5.03994988684249e-06, "loss": 0.8896, "step": 6649 }, { "epoch": 0.5126426148627814, "grad_norm": 4.328100681304932, "learning_rate": 5.038701478213545e-06, "loss": 0.8541, "step": 6650 }, { "epoch": 0.5127197039777983, "grad_norm": 4.055181503295898, "learning_rate": 5.037453067171763e-06, "loss": 0.9379, "step": 6651 }, { "epoch": 0.5127967930928153, "grad_norm": 3.8188493251800537, "learning_rate": 5.036204653794975e-06, "loss": 1.0316, "step": 6652 }, { "epoch": 0.5128738822078323, "grad_norm": 3.723499059677124, "learning_rate": 5.034956238161015e-06, "loss": 1.0024, "step": 6653 }, { "epoch": 0.5129509713228492, "grad_norm": 3.589024782180786, "learning_rate": 5.033707820347715e-06, "loss": 0.9118, "step": 6654 }, { "epoch": 0.5130280604378662, "grad_norm": 4.008199691772461, "learning_rate": 5.0324594004329065e-06, "loss": 0.956, "step": 6655 }, { "epoch": 0.5131051495528831, "grad_norm": 3.509976387023926, "learning_rate": 5.031210978494423e-06, "loss": 0.9386, "step": 6656 }, { "epoch": 0.5131822386679001, "grad_norm": 3.630739212036133, "learning_rate": 5.029962554610096e-06, "loss": 0.9486, "step": 6657 }, { "epoch": 0.513259327782917, "grad_norm": 3.7410812377929688, "learning_rate": 5.0287141288577575e-06, "loss": 0.933, "step": 6658 }, { "epoch": 0.513336416897934, "grad_norm": 3.538902997970581, "learning_rate": 5.027465701315245e-06, "loss": 0.9184, "step": 6659 }, { "epoch": 0.513413506012951, "grad_norm": 3.678654670715332, "learning_rate": 5.026217272060386e-06, "loss": 0.8957, "step": 6660 }, { "epoch": 0.5134905951279679, "grad_norm": 3.5913567543029785, "learning_rate": 5.024968841171016e-06, "loss": 1.0199, "step": 6661 }, { "epoch": 0.5135676842429849, "grad_norm": 3.574263334274292, "learning_rate": 5.023720408724971e-06, "loss": 0.9235, "step": 6662 }, { "epoch": 0.5136447733580018, "grad_norm": 4.819427490234375, "learning_rate": 5.022471974800077e-06, "loss": 0.8857, "step": 6663 }, { "epoch": 0.5137218624730188, "grad_norm": 3.4264743328094482, "learning_rate": 5.021223539474175e-06, "loss": 0.9493, "step": 6664 }, { "epoch": 0.5137989515880358, "grad_norm": 4.047597885131836, "learning_rate": 5.019975102825093e-06, "loss": 0.9116, "step": 6665 }, { "epoch": 0.5138760407030527, "grad_norm": 3.5010175704956055, "learning_rate": 5.018726664930667e-06, "loss": 0.9236, "step": 6666 }, { "epoch": 0.5139531298180697, "grad_norm": 3.643869638442993, "learning_rate": 5.017478225868733e-06, "loss": 0.945, "step": 6667 }, { "epoch": 0.5140302189330866, "grad_norm": 3.658156156539917, "learning_rate": 5.0162297857171196e-06, "loss": 0.8691, "step": 6668 }, { "epoch": 0.5141073080481036, "grad_norm": 3.7067670822143555, "learning_rate": 5.0149813445536634e-06, "loss": 0.9861, "step": 6669 }, { "epoch": 0.5141843971631206, "grad_norm": 3.7881758213043213, "learning_rate": 5.0137329024561985e-06, "loss": 0.9236, "step": 6670 }, { "epoch": 0.5142614862781375, "grad_norm": 3.391571521759033, "learning_rate": 5.012484459502558e-06, "loss": 0.9774, "step": 6671 }, { "epoch": 0.5143385753931545, "grad_norm": 3.8176701068878174, "learning_rate": 5.011236015770577e-06, "loss": 0.8668, "step": 6672 }, { "epoch": 0.5144156645081714, "grad_norm": 3.639432191848755, "learning_rate": 5.0099875713380875e-06, "loss": 0.879, "step": 6673 }, { "epoch": 0.5144927536231884, "grad_norm": 3.630164861679077, "learning_rate": 5.008739126282924e-06, "loss": 0.9275, "step": 6674 }, { "epoch": 0.5145698427382054, "grad_norm": 3.4726369380950928, "learning_rate": 5.007490680682924e-06, "loss": 0.9402, "step": 6675 }, { "epoch": 0.5146469318532223, "grad_norm": 4.002774715423584, "learning_rate": 5.006242234615916e-06, "loss": 0.9698, "step": 6676 }, { "epoch": 0.5147240209682393, "grad_norm": 3.349799871444702, "learning_rate": 5.004993788159739e-06, "loss": 0.913, "step": 6677 }, { "epoch": 0.5148011100832562, "grad_norm": 3.616485595703125, "learning_rate": 5.003745341392225e-06, "loss": 0.8706, "step": 6678 }, { "epoch": 0.5148781991982732, "grad_norm": 3.556090831756592, "learning_rate": 5.0024968943912055e-06, "loss": 0.9315, "step": 6679 }, { "epoch": 0.5149552883132902, "grad_norm": 3.7224857807159424, "learning_rate": 5.001248447234521e-06, "loss": 0.8919, "step": 6680 }, { "epoch": 0.5150323774283071, "grad_norm": 3.654167890548706, "learning_rate": 5e-06, "loss": 0.8902, "step": 6681 }, { "epoch": 0.5151094665433241, "grad_norm": 4.025610446929932, "learning_rate": 4.99875155276548e-06, "loss": 0.9718, "step": 6682 }, { "epoch": 0.515186555658341, "grad_norm": 3.4189112186431885, "learning_rate": 4.997503105608795e-06, "loss": 0.9592, "step": 6683 }, { "epoch": 0.515263644773358, "grad_norm": 3.5572469234466553, "learning_rate": 4.996254658607778e-06, "loss": 0.9258, "step": 6684 }, { "epoch": 0.515340733888375, "grad_norm": 3.8865630626678467, "learning_rate": 4.995006211840263e-06, "loss": 0.9746, "step": 6685 }, { "epoch": 0.5154178230033919, "grad_norm": 3.353334426879883, "learning_rate": 4.993757765384085e-06, "loss": 1.0067, "step": 6686 }, { "epoch": 0.5154949121184089, "grad_norm": 3.3230345249176025, "learning_rate": 4.992509319317078e-06, "loss": 0.8983, "step": 6687 }, { "epoch": 0.5155720012334258, "grad_norm": 3.8244221210479736, "learning_rate": 4.991260873717077e-06, "loss": 1.0921, "step": 6688 }, { "epoch": 0.5156490903484428, "grad_norm": 3.557263135910034, "learning_rate": 4.990012428661914e-06, "loss": 1.0434, "step": 6689 }, { "epoch": 0.5157261794634598, "grad_norm": 3.5607333183288574, "learning_rate": 4.988763984229425e-06, "loss": 1.0169, "step": 6690 }, { "epoch": 0.5158032685784767, "grad_norm": 3.5967090129852295, "learning_rate": 4.987515540497444e-06, "loss": 1.0117, "step": 6691 }, { "epoch": 0.5158803576934937, "grad_norm": 3.827690839767456, "learning_rate": 4.986267097543803e-06, "loss": 0.9017, "step": 6692 }, { "epoch": 0.5159574468085106, "grad_norm": 3.67106294631958, "learning_rate": 4.985018655446337e-06, "loss": 0.9155, "step": 6693 }, { "epoch": 0.5160345359235275, "grad_norm": 3.8216536045074463, "learning_rate": 4.983770214282883e-06, "loss": 0.9669, "step": 6694 }, { "epoch": 0.5161116250385446, "grad_norm": 4.562589645385742, "learning_rate": 4.9825217741312695e-06, "loss": 0.9065, "step": 6695 }, { "epoch": 0.5161887141535615, "grad_norm": 3.8225772380828857, "learning_rate": 4.9812733350693335e-06, "loss": 1.104, "step": 6696 }, { "epoch": 0.5162658032685785, "grad_norm": 3.3218376636505127, "learning_rate": 4.980024897174909e-06, "loss": 0.8526, "step": 6697 }, { "epoch": 0.5163428923835954, "grad_norm": 3.461397886276245, "learning_rate": 4.978776460525827e-06, "loss": 1.0473, "step": 6698 }, { "epoch": 0.5164199814986123, "grad_norm": 4.374227523803711, "learning_rate": 4.977528025199925e-06, "loss": 1.0439, "step": 6699 }, { "epoch": 0.5164970706136294, "grad_norm": 3.6719889640808105, "learning_rate": 4.976279591275033e-06, "loss": 0.9636, "step": 6700 }, { "epoch": 0.5165741597286463, "grad_norm": 3.455756425857544, "learning_rate": 4.975031158828985e-06, "loss": 0.9633, "step": 6701 }, { "epoch": 0.5166512488436633, "grad_norm": 3.604682445526123, "learning_rate": 4.9737827279396165e-06, "loss": 0.8877, "step": 6702 }, { "epoch": 0.5167283379586802, "grad_norm": 4.052313327789307, "learning_rate": 4.9725342986847575e-06, "loss": 0.9772, "step": 6703 }, { "epoch": 0.5168054270736971, "grad_norm": 3.812180757522583, "learning_rate": 4.971285871142242e-06, "loss": 0.9781, "step": 6704 }, { "epoch": 0.5168825161887142, "grad_norm": 3.562392234802246, "learning_rate": 4.9700374453899046e-06, "loss": 0.918, "step": 6705 }, { "epoch": 0.5169596053037311, "grad_norm": 3.3591978549957275, "learning_rate": 4.968789021505578e-06, "loss": 0.9033, "step": 6706 }, { "epoch": 0.5170366944187481, "grad_norm": 3.2979397773742676, "learning_rate": 4.967540599567094e-06, "loss": 0.9739, "step": 6707 }, { "epoch": 0.517113783533765, "grad_norm": 3.202559232711792, "learning_rate": 4.9662921796522856e-06, "loss": 0.7981, "step": 6708 }, { "epoch": 0.5171908726487819, "grad_norm": 3.387112617492676, "learning_rate": 4.965043761838986e-06, "loss": 0.911, "step": 6709 }, { "epoch": 0.517267961763799, "grad_norm": 3.931759834289551, "learning_rate": 4.963795346205026e-06, "loss": 1.0281, "step": 6710 }, { "epoch": 0.5173450508788159, "grad_norm": 3.8739635944366455, "learning_rate": 4.962546932828238e-06, "loss": 1.0021, "step": 6711 }, { "epoch": 0.5174221399938329, "grad_norm": 3.6043684482574463, "learning_rate": 4.961298521786456e-06, "loss": 0.9596, "step": 6712 }, { "epoch": 0.5174992291088498, "grad_norm": 3.7522406578063965, "learning_rate": 4.96005011315751e-06, "loss": 0.9987, "step": 6713 }, { "epoch": 0.5175763182238667, "grad_norm": 3.633314609527588, "learning_rate": 4.958801707019234e-06, "loss": 0.9147, "step": 6714 }, { "epoch": 0.5176534073388838, "grad_norm": 3.7346205711364746, "learning_rate": 4.957553303449456e-06, "loss": 0.9952, "step": 6715 }, { "epoch": 0.5177304964539007, "grad_norm": 3.443331003189087, "learning_rate": 4.9563049025260105e-06, "loss": 0.8854, "step": 6716 }, { "epoch": 0.5178075855689177, "grad_norm": 3.816298723220825, "learning_rate": 4.9550565043267285e-06, "loss": 1.0634, "step": 6717 }, { "epoch": 0.5178846746839346, "grad_norm": 3.5989532470703125, "learning_rate": 4.95380810892944e-06, "loss": 0.9299, "step": 6718 }, { "epoch": 0.5179617637989515, "grad_norm": 3.5715746879577637, "learning_rate": 4.952559716411976e-06, "loss": 0.9129, "step": 6719 }, { "epoch": 0.5180388529139686, "grad_norm": 3.993812322616577, "learning_rate": 4.95131132685217e-06, "loss": 0.9028, "step": 6720 }, { "epoch": 0.5181159420289855, "grad_norm": 3.4312379360198975, "learning_rate": 4.950062940327849e-06, "loss": 1.039, "step": 6721 }, { "epoch": 0.5181930311440025, "grad_norm": 3.6945204734802246, "learning_rate": 4.948814556916845e-06, "loss": 0.9212, "step": 6722 }, { "epoch": 0.5182701202590194, "grad_norm": 3.5042591094970703, "learning_rate": 4.94756617669699e-06, "loss": 0.9631, "step": 6723 }, { "epoch": 0.5183472093740363, "grad_norm": 3.6338624954223633, "learning_rate": 4.946317799746111e-06, "loss": 1.0292, "step": 6724 }, { "epoch": 0.5184242984890534, "grad_norm": 3.668816566467285, "learning_rate": 4.94506942614204e-06, "loss": 0.9855, "step": 6725 }, { "epoch": 0.5185013876040703, "grad_norm": 4.495701313018799, "learning_rate": 4.9438210559626045e-06, "loss": 0.9881, "step": 6726 }, { "epoch": 0.5185784767190873, "grad_norm": 3.3641388416290283, "learning_rate": 4.942572689285636e-06, "loss": 0.9117, "step": 6727 }, { "epoch": 0.5186555658341042, "grad_norm": 4.106291770935059, "learning_rate": 4.941324326188963e-06, "loss": 0.9193, "step": 6728 }, { "epoch": 0.5187326549491211, "grad_norm": 3.9092233180999756, "learning_rate": 4.940075966750414e-06, "loss": 1.0645, "step": 6729 }, { "epoch": 0.5188097440641382, "grad_norm": 4.035946846008301, "learning_rate": 4.938827611047818e-06, "loss": 1.0177, "step": 6730 }, { "epoch": 0.5188868331791551, "grad_norm": 3.45704984664917, "learning_rate": 4.937579259159004e-06, "loss": 0.9508, "step": 6731 }, { "epoch": 0.5189639222941721, "grad_norm": 3.8246238231658936, "learning_rate": 4.9363309111618e-06, "loss": 1.014, "step": 6732 }, { "epoch": 0.519041011409189, "grad_norm": 3.2717719078063965, "learning_rate": 4.935082567134033e-06, "loss": 0.8324, "step": 6733 }, { "epoch": 0.5191181005242059, "grad_norm": 3.4391167163848877, "learning_rate": 4.933834227153533e-06, "loss": 0.8627, "step": 6734 }, { "epoch": 0.519195189639223, "grad_norm": 3.452841281890869, "learning_rate": 4.9325858912981265e-06, "loss": 1.0195, "step": 6735 }, { "epoch": 0.5192722787542399, "grad_norm": 3.5897481441497803, "learning_rate": 4.93133755964564e-06, "loss": 0.8639, "step": 6736 }, { "epoch": 0.5193493678692569, "grad_norm": 3.7452785968780518, "learning_rate": 4.9300892322739e-06, "loss": 0.9422, "step": 6737 }, { "epoch": 0.5194264569842738, "grad_norm": 3.614225149154663, "learning_rate": 4.928840909260735e-06, "loss": 0.9332, "step": 6738 }, { "epoch": 0.5195035460992907, "grad_norm": 3.500887870788574, "learning_rate": 4.927592590683972e-06, "loss": 0.9325, "step": 6739 }, { "epoch": 0.5195806352143078, "grad_norm": 3.3096768856048584, "learning_rate": 4.926344276621434e-06, "loss": 0.8339, "step": 6740 }, { "epoch": 0.5196577243293247, "grad_norm": 3.481545925140381, "learning_rate": 4.925095967150949e-06, "loss": 1.0082, "step": 6741 }, { "epoch": 0.5197348134443417, "grad_norm": 3.6488254070281982, "learning_rate": 4.923847662350344e-06, "loss": 1.0195, "step": 6742 }, { "epoch": 0.5198119025593586, "grad_norm": 3.6356041431427, "learning_rate": 4.922599362297442e-06, "loss": 0.9489, "step": 6743 }, { "epoch": 0.5198889916743756, "grad_norm": 3.7208831310272217, "learning_rate": 4.921351067070068e-06, "loss": 0.9843, "step": 6744 }, { "epoch": 0.5199660807893925, "grad_norm": 3.6727988719940186, "learning_rate": 4.920102776746049e-06, "loss": 0.9307, "step": 6745 }, { "epoch": 0.5200431699044095, "grad_norm": 3.49745774269104, "learning_rate": 4.918854491403208e-06, "loss": 0.8772, "step": 6746 }, { "epoch": 0.5201202590194265, "grad_norm": 3.6098008155822754, "learning_rate": 4.91760621111937e-06, "loss": 0.9431, "step": 6747 }, { "epoch": 0.5201973481344434, "grad_norm": 3.857675552368164, "learning_rate": 4.916357935972354e-06, "loss": 0.9809, "step": 6748 }, { "epoch": 0.5202744372494604, "grad_norm": 3.8687050342559814, "learning_rate": 4.915109666039991e-06, "loss": 1.0598, "step": 6749 }, { "epoch": 0.5203515263644773, "grad_norm": 3.66666841506958, "learning_rate": 4.9138614014001e-06, "loss": 0.844, "step": 6750 }, { "epoch": 0.5204286154794943, "grad_norm": 3.5708515644073486, "learning_rate": 4.912613142130504e-06, "loss": 1.031, "step": 6751 }, { "epoch": 0.5205057045945113, "grad_norm": 3.5527634620666504, "learning_rate": 4.911364888309027e-06, "loss": 0.9202, "step": 6752 }, { "epoch": 0.5205827937095282, "grad_norm": 3.893904447555542, "learning_rate": 4.910116640013489e-06, "loss": 0.8644, "step": 6753 }, { "epoch": 0.5206598828245452, "grad_norm": 3.3729803562164307, "learning_rate": 4.908868397321714e-06, "loss": 0.848, "step": 6754 }, { "epoch": 0.5207369719395621, "grad_norm": 3.5364203453063965, "learning_rate": 4.907620160311522e-06, "loss": 0.9323, "step": 6755 }, { "epoch": 0.5208140610545791, "grad_norm": 3.7312333583831787, "learning_rate": 4.906371929060734e-06, "loss": 0.9715, "step": 6756 }, { "epoch": 0.5208911501695961, "grad_norm": 3.8178627490997314, "learning_rate": 4.905123703647173e-06, "loss": 0.8637, "step": 6757 }, { "epoch": 0.520968239284613, "grad_norm": 4.14602518081665, "learning_rate": 4.903875484148656e-06, "loss": 1.0387, "step": 6758 }, { "epoch": 0.52104532839963, "grad_norm": 4.017736911773682, "learning_rate": 4.902627270643006e-06, "loss": 1.0268, "step": 6759 }, { "epoch": 0.5211224175146469, "grad_norm": 3.643479347229004, "learning_rate": 4.901379063208041e-06, "loss": 1.0273, "step": 6760 }, { "epoch": 0.5211995066296639, "grad_norm": 3.823141574859619, "learning_rate": 4.90013086192158e-06, "loss": 0.9976, "step": 6761 }, { "epoch": 0.5212765957446809, "grad_norm": 3.5352299213409424, "learning_rate": 4.898882666861444e-06, "loss": 1.007, "step": 6762 }, { "epoch": 0.5213536848596978, "grad_norm": 3.585045576095581, "learning_rate": 4.897634478105448e-06, "loss": 0.9049, "step": 6763 }, { "epoch": 0.5214307739747148, "grad_norm": 3.5495879650115967, "learning_rate": 4.896386295731413e-06, "loss": 0.9165, "step": 6764 }, { "epoch": 0.5215078630897317, "grad_norm": 3.4797205924987793, "learning_rate": 4.895138119817156e-06, "loss": 0.991, "step": 6765 }, { "epoch": 0.5215849522047487, "grad_norm": 3.806225538253784, "learning_rate": 4.8938899504404944e-06, "loss": 0.867, "step": 6766 }, { "epoch": 0.5216620413197657, "grad_norm": 3.340394973754883, "learning_rate": 4.892641787679244e-06, "loss": 0.8744, "step": 6767 }, { "epoch": 0.5217391304347826, "grad_norm": 3.620096206665039, "learning_rate": 4.891393631611223e-06, "loss": 1.0585, "step": 6768 }, { "epoch": 0.5218162195497996, "grad_norm": 3.8515403270721436, "learning_rate": 4.8901454823142454e-06, "loss": 1.0662, "step": 6769 }, { "epoch": 0.5218933086648165, "grad_norm": 4.0987420082092285, "learning_rate": 4.888897339866129e-06, "loss": 1.0095, "step": 6770 }, { "epoch": 0.5219703977798335, "grad_norm": 3.7818572521209717, "learning_rate": 4.887649204344688e-06, "loss": 0.9227, "step": 6771 }, { "epoch": 0.5220474868948505, "grad_norm": 3.931004285812378, "learning_rate": 4.886401075827737e-06, "loss": 0.9917, "step": 6772 }, { "epoch": 0.5221245760098674, "grad_norm": 3.2993662357330322, "learning_rate": 4.8851529543930915e-06, "loss": 0.7365, "step": 6773 }, { "epoch": 0.5222016651248844, "grad_norm": 3.5455148220062256, "learning_rate": 4.883904840118563e-06, "loss": 0.9588, "step": 6774 }, { "epoch": 0.5222787542399013, "grad_norm": 3.565535306930542, "learning_rate": 4.882656733081967e-06, "loss": 0.9395, "step": 6775 }, { "epoch": 0.5223558433549182, "grad_norm": 4.117207050323486, "learning_rate": 4.881408633361116e-06, "loss": 1.1194, "step": 6776 }, { "epoch": 0.5224329324699353, "grad_norm": 3.6578311920166016, "learning_rate": 4.880160541033822e-06, "loss": 0.9629, "step": 6777 }, { "epoch": 0.5225100215849522, "grad_norm": 4.134820938110352, "learning_rate": 4.878912456177898e-06, "loss": 0.9406, "step": 6778 }, { "epoch": 0.5225871106999692, "grad_norm": 3.462031841278076, "learning_rate": 4.8776643788711565e-06, "loss": 0.8621, "step": 6779 }, { "epoch": 0.5226641998149861, "grad_norm": 3.252340793609619, "learning_rate": 4.876416309191406e-06, "loss": 0.9939, "step": 6780 }, { "epoch": 0.522741288930003, "grad_norm": 3.3890349864959717, "learning_rate": 4.8751682472164585e-06, "loss": 0.9544, "step": 6781 }, { "epoch": 0.5228183780450201, "grad_norm": 3.870720624923706, "learning_rate": 4.873920193024125e-06, "loss": 0.9931, "step": 6782 }, { "epoch": 0.522895467160037, "grad_norm": 3.4139373302459717, "learning_rate": 4.872672146692214e-06, "loss": 0.8944, "step": 6783 }, { "epoch": 0.522972556275054, "grad_norm": 4.145259380340576, "learning_rate": 4.871424108298536e-06, "loss": 1.0266, "step": 6784 }, { "epoch": 0.5230496453900709, "grad_norm": 3.556744337081909, "learning_rate": 4.8701760779208975e-06, "loss": 0.8754, "step": 6785 }, { "epoch": 0.5231267345050878, "grad_norm": 3.8212878704071045, "learning_rate": 4.8689280556371084e-06, "loss": 1.0068, "step": 6786 }, { "epoch": 0.5232038236201049, "grad_norm": 4.2917399406433105, "learning_rate": 4.867680041524977e-06, "loss": 1.0567, "step": 6787 }, { "epoch": 0.5232809127351218, "grad_norm": 3.264390230178833, "learning_rate": 4.866432035662309e-06, "loss": 0.9187, "step": 6788 }, { "epoch": 0.5233580018501388, "grad_norm": 3.5728774070739746, "learning_rate": 4.865184038126912e-06, "loss": 0.9863, "step": 6789 }, { "epoch": 0.5234350909651557, "grad_norm": 4.23891544342041, "learning_rate": 4.863936048996593e-06, "loss": 0.8834, "step": 6790 }, { "epoch": 0.5235121800801726, "grad_norm": 3.400658130645752, "learning_rate": 4.862688068349156e-06, "loss": 0.8721, "step": 6791 }, { "epoch": 0.5235892691951897, "grad_norm": 3.758239507675171, "learning_rate": 4.861440096262404e-06, "loss": 0.8671, "step": 6792 }, { "epoch": 0.5236663583102066, "grad_norm": 3.4603350162506104, "learning_rate": 4.860192132814148e-06, "loss": 0.9106, "step": 6793 }, { "epoch": 0.5237434474252236, "grad_norm": 3.423933506011963, "learning_rate": 4.858944178082188e-06, "loss": 0.9117, "step": 6794 }, { "epoch": 0.5238205365402405, "grad_norm": 3.4197144508361816, "learning_rate": 4.857696232144327e-06, "loss": 0.9417, "step": 6795 }, { "epoch": 0.5238976256552574, "grad_norm": 3.6277825832366943, "learning_rate": 4.8564482950783685e-06, "loss": 0.9014, "step": 6796 }, { "epoch": 0.5239747147702745, "grad_norm": 3.313845157623291, "learning_rate": 4.855200366962116e-06, "loss": 1.0002, "step": 6797 }, { "epoch": 0.5240518038852914, "grad_norm": 3.5861170291900635, "learning_rate": 4.853952447873371e-06, "loss": 0.9123, "step": 6798 }, { "epoch": 0.5241288930003084, "grad_norm": 3.7218551635742188, "learning_rate": 4.852704537889934e-06, "loss": 0.988, "step": 6799 }, { "epoch": 0.5242059821153253, "grad_norm": 3.6301844120025635, "learning_rate": 4.851456637089607e-06, "loss": 0.9492, "step": 6800 }, { "epoch": 0.5242830712303422, "grad_norm": 3.789766788482666, "learning_rate": 4.850208745550189e-06, "loss": 0.8719, "step": 6801 }, { "epoch": 0.5243601603453593, "grad_norm": 3.7619876861572266, "learning_rate": 4.84896086334948e-06, "loss": 1.0564, "step": 6802 }, { "epoch": 0.5244372494603762, "grad_norm": 3.331724166870117, "learning_rate": 4.8477129905652785e-06, "loss": 0.9431, "step": 6803 }, { "epoch": 0.5245143385753932, "grad_norm": 3.551481246948242, "learning_rate": 4.8464651272753835e-06, "loss": 0.9178, "step": 6804 }, { "epoch": 0.5245914276904101, "grad_norm": 3.397059440612793, "learning_rate": 4.845217273557593e-06, "loss": 1.0281, "step": 6805 }, { "epoch": 0.524668516805427, "grad_norm": 3.586930990219116, "learning_rate": 4.843969429489703e-06, "loss": 1.0194, "step": 6806 }, { "epoch": 0.5247456059204441, "grad_norm": 3.493337392807007, "learning_rate": 4.842721595149511e-06, "loss": 0.9052, "step": 6807 }, { "epoch": 0.524822695035461, "grad_norm": 3.607456684112549, "learning_rate": 4.841473770614814e-06, "loss": 0.8831, "step": 6808 }, { "epoch": 0.524899784150478, "grad_norm": 4.256878852844238, "learning_rate": 4.840225955963406e-06, "loss": 0.9496, "step": 6809 }, { "epoch": 0.5249768732654949, "grad_norm": 3.489366292953491, "learning_rate": 4.838978151273082e-06, "loss": 0.8825, "step": 6810 }, { "epoch": 0.5250539623805118, "grad_norm": 3.237924814224243, "learning_rate": 4.8377303566216365e-06, "loss": 0.8377, "step": 6811 }, { "epoch": 0.5251310514955289, "grad_norm": 3.6981184482574463, "learning_rate": 4.836482572086862e-06, "loss": 0.975, "step": 6812 }, { "epoch": 0.5252081406105458, "grad_norm": 3.9798178672790527, "learning_rate": 4.8352347977465535e-06, "loss": 1.0573, "step": 6813 }, { "epoch": 0.5252852297255628, "grad_norm": 3.7807118892669678, "learning_rate": 4.833987033678501e-06, "loss": 1.055, "step": 6814 }, { "epoch": 0.5253623188405797, "grad_norm": 3.7995917797088623, "learning_rate": 4.8327392799604974e-06, "loss": 1.0315, "step": 6815 }, { "epoch": 0.5254394079555966, "grad_norm": 3.570406675338745, "learning_rate": 4.831491536670334e-06, "loss": 0.8707, "step": 6816 }, { "epoch": 0.5255164970706137, "grad_norm": 3.5016283988952637, "learning_rate": 4.8302438038858005e-06, "loss": 1.0007, "step": 6817 }, { "epoch": 0.5255935861856306, "grad_norm": 3.539832830429077, "learning_rate": 4.828996081684685e-06, "loss": 0.8653, "step": 6818 }, { "epoch": 0.5256706753006476, "grad_norm": 3.599923610687256, "learning_rate": 4.82774837014478e-06, "loss": 0.9648, "step": 6819 }, { "epoch": 0.5257477644156645, "grad_norm": 3.7027664184570312, "learning_rate": 4.826500669343872e-06, "loss": 1.0022, "step": 6820 }, { "epoch": 0.5258248535306814, "grad_norm": 3.6279308795928955, "learning_rate": 4.8252529793597484e-06, "loss": 0.978, "step": 6821 }, { "epoch": 0.5259019426456985, "grad_norm": 3.6738011837005615, "learning_rate": 4.824005300270196e-06, "loss": 0.9233, "step": 6822 }, { "epoch": 0.5259790317607154, "grad_norm": 3.7691490650177, "learning_rate": 4.8227576321530015e-06, "loss": 0.9322, "step": 6823 }, { "epoch": 0.5260561208757324, "grad_norm": 3.266089916229248, "learning_rate": 4.821509975085952e-06, "loss": 0.8356, "step": 6824 }, { "epoch": 0.5261332099907493, "grad_norm": 3.7823686599731445, "learning_rate": 4.820262329146829e-06, "loss": 0.9445, "step": 6825 }, { "epoch": 0.5262102991057662, "grad_norm": 3.267396926879883, "learning_rate": 4.81901469441342e-06, "loss": 0.7796, "step": 6826 }, { "epoch": 0.5262873882207832, "grad_norm": 3.59197998046875, "learning_rate": 4.8177670709635085e-06, "loss": 0.9793, "step": 6827 }, { "epoch": 0.5263644773358002, "grad_norm": 3.956080913543701, "learning_rate": 4.8165194588748745e-06, "loss": 0.9847, "step": 6828 }, { "epoch": 0.5264415664508172, "grad_norm": 3.319718837738037, "learning_rate": 4.815271858225303e-06, "loss": 0.7932, "step": 6829 }, { "epoch": 0.5265186555658341, "grad_norm": 3.7214298248291016, "learning_rate": 4.814024269092575e-06, "loss": 0.9562, "step": 6830 }, { "epoch": 0.526595744680851, "grad_norm": 3.7417917251586914, "learning_rate": 4.812776691554469e-06, "loss": 1.0291, "step": 6831 }, { "epoch": 0.526672833795868, "grad_norm": 3.410249948501587, "learning_rate": 4.811529125688768e-06, "loss": 0.9492, "step": 6832 }, { "epoch": 0.526749922910885, "grad_norm": 3.934293031692505, "learning_rate": 4.8102815715732495e-06, "loss": 1.0856, "step": 6833 }, { "epoch": 0.526827012025902, "grad_norm": 3.7458932399749756, "learning_rate": 4.809034029285691e-06, "loss": 0.9264, "step": 6834 }, { "epoch": 0.5269041011409189, "grad_norm": 3.63541579246521, "learning_rate": 4.807786498903874e-06, "loss": 0.9759, "step": 6835 }, { "epoch": 0.5269811902559358, "grad_norm": 3.5854616165161133, "learning_rate": 4.806538980505572e-06, "loss": 1.0086, "step": 6836 }, { "epoch": 0.5270582793709528, "grad_norm": 3.602339267730713, "learning_rate": 4.805291474168561e-06, "loss": 0.8689, "step": 6837 }, { "epoch": 0.5271353684859698, "grad_norm": 3.346555471420288, "learning_rate": 4.80404397997062e-06, "loss": 0.867, "step": 6838 }, { "epoch": 0.5272124576009868, "grad_norm": 3.9737861156463623, "learning_rate": 4.802796497989523e-06, "loss": 0.9963, "step": 6839 }, { "epoch": 0.5272895467160037, "grad_norm": 3.378373146057129, "learning_rate": 4.801549028303042e-06, "loss": 0.8988, "step": 6840 }, { "epoch": 0.5273666358310206, "grad_norm": 3.7955613136291504, "learning_rate": 4.800301570988951e-06, "loss": 0.9651, "step": 6841 }, { "epoch": 0.5274437249460376, "grad_norm": 3.713027238845825, "learning_rate": 4.799054126125023e-06, "loss": 0.9965, "step": 6842 }, { "epoch": 0.5275208140610546, "grad_norm": 3.291476011276245, "learning_rate": 4.79780669378903e-06, "loss": 0.9719, "step": 6843 }, { "epoch": 0.5275979031760716, "grad_norm": 3.837628126144409, "learning_rate": 4.796559274058741e-06, "loss": 0.8665, "step": 6844 }, { "epoch": 0.5276749922910885, "grad_norm": 3.562666177749634, "learning_rate": 4.79531186701193e-06, "loss": 0.9667, "step": 6845 }, { "epoch": 0.5277520814061054, "grad_norm": 3.401890754699707, "learning_rate": 4.7940644727263615e-06, "loss": 0.8789, "step": 6846 }, { "epoch": 0.5278291705211224, "grad_norm": 3.439560651779175, "learning_rate": 4.7928170912798075e-06, "loss": 0.8647, "step": 6847 }, { "epoch": 0.5279062596361394, "grad_norm": 3.419785737991333, "learning_rate": 4.791569722750036e-06, "loss": 0.8197, "step": 6848 }, { "epoch": 0.5279833487511564, "grad_norm": 3.7677369117736816, "learning_rate": 4.790322367214812e-06, "loss": 0.9698, "step": 6849 }, { "epoch": 0.5280604378661733, "grad_norm": 3.788708448410034, "learning_rate": 4.789075024751903e-06, "loss": 0.9806, "step": 6850 }, { "epoch": 0.5281375269811902, "grad_norm": 3.4838881492614746, "learning_rate": 4.7878276954390726e-06, "loss": 0.9545, "step": 6851 }, { "epoch": 0.5282146160962072, "grad_norm": 3.561063528060913, "learning_rate": 4.786580379354087e-06, "loss": 0.9546, "step": 6852 }, { "epoch": 0.5282917052112241, "grad_norm": 3.6301426887512207, "learning_rate": 4.78533307657471e-06, "loss": 0.9259, "step": 6853 }, { "epoch": 0.5283687943262412, "grad_norm": 3.6689276695251465, "learning_rate": 4.784085787178703e-06, "loss": 0.9707, "step": 6854 }, { "epoch": 0.5284458834412581, "grad_norm": 3.4558374881744385, "learning_rate": 4.782838511243828e-06, "loss": 0.8788, "step": 6855 }, { "epoch": 0.528522972556275, "grad_norm": 3.636366367340088, "learning_rate": 4.78159124884785e-06, "loss": 0.9705, "step": 6856 }, { "epoch": 0.528600061671292, "grad_norm": 3.362304925918579, "learning_rate": 4.780344000068524e-06, "loss": 0.898, "step": 6857 }, { "epoch": 0.528677150786309, "grad_norm": 3.9811975955963135, "learning_rate": 4.779096764983614e-06, "loss": 0.9952, "step": 6858 }, { "epoch": 0.528754239901326, "grad_norm": 3.3469631671905518, "learning_rate": 4.777849543670875e-06, "loss": 0.91, "step": 6859 }, { "epoch": 0.5288313290163429, "grad_norm": 3.896716594696045, "learning_rate": 4.776602336208066e-06, "loss": 1.0635, "step": 6860 }, { "epoch": 0.5289084181313598, "grad_norm": 3.27893328666687, "learning_rate": 4.775355142672946e-06, "loss": 0.9398, "step": 6861 }, { "epoch": 0.5289855072463768, "grad_norm": 3.709341049194336, "learning_rate": 4.774107963143269e-06, "loss": 0.8442, "step": 6862 }, { "epoch": 0.5290625963613937, "grad_norm": 4.007583141326904, "learning_rate": 4.772860797696789e-06, "loss": 0.9884, "step": 6863 }, { "epoch": 0.5291396854764108, "grad_norm": 3.773080587387085, "learning_rate": 4.771613646411264e-06, "loss": 1.0555, "step": 6864 }, { "epoch": 0.5292167745914277, "grad_norm": 3.6124513149261475, "learning_rate": 4.770366509364444e-06, "loss": 1.0263, "step": 6865 }, { "epoch": 0.5292938637064446, "grad_norm": 3.6658780574798584, "learning_rate": 4.7691193866340825e-06, "loss": 0.9506, "step": 6866 }, { "epoch": 0.5293709528214616, "grad_norm": 3.8184187412261963, "learning_rate": 4.767872278297933e-06, "loss": 0.9266, "step": 6867 }, { "epoch": 0.5294480419364785, "grad_norm": 3.761213779449463, "learning_rate": 4.766625184433744e-06, "loss": 0.9132, "step": 6868 }, { "epoch": 0.5295251310514956, "grad_norm": 3.682281970977783, "learning_rate": 4.765378105119266e-06, "loss": 0.9613, "step": 6869 }, { "epoch": 0.5296022201665125, "grad_norm": 4.122997760772705, "learning_rate": 4.7641310404322475e-06, "loss": 1.1595, "step": 6870 }, { "epoch": 0.5296793092815294, "grad_norm": 3.7581260204315186, "learning_rate": 4.762883990450437e-06, "loss": 0.8558, "step": 6871 }, { "epoch": 0.5297563983965464, "grad_norm": 3.5212559700012207, "learning_rate": 4.761636955251584e-06, "loss": 0.9798, "step": 6872 }, { "epoch": 0.5298334875115633, "grad_norm": 3.686304807662964, "learning_rate": 4.760389934913429e-06, "loss": 0.9631, "step": 6873 }, { "epoch": 0.5299105766265804, "grad_norm": 3.6530697345733643, "learning_rate": 4.759142929513722e-06, "loss": 0.9296, "step": 6874 }, { "epoch": 0.5299876657415973, "grad_norm": 3.5719711780548096, "learning_rate": 4.757895939130206e-06, "loss": 1.0083, "step": 6875 }, { "epoch": 0.5300647548566142, "grad_norm": 4.652621746063232, "learning_rate": 4.7566489638406245e-06, "loss": 0.8969, "step": 6876 }, { "epoch": 0.5301418439716312, "grad_norm": 3.799062490463257, "learning_rate": 4.755402003722719e-06, "loss": 1.0924, "step": 6877 }, { "epoch": 0.5302189330866481, "grad_norm": 3.6467134952545166, "learning_rate": 4.754155058854233e-06, "loss": 1.0393, "step": 6878 }, { "epoch": 0.5302960222016652, "grad_norm": 3.597630500793457, "learning_rate": 4.752908129312905e-06, "loss": 1.0003, "step": 6879 }, { "epoch": 0.5303731113166821, "grad_norm": 3.513861894607544, "learning_rate": 4.751661215176476e-06, "loss": 0.9198, "step": 6880 }, { "epoch": 0.530450200431699, "grad_norm": 3.668997049331665, "learning_rate": 4.750414316522681e-06, "loss": 0.9596, "step": 6881 }, { "epoch": 0.530527289546716, "grad_norm": 3.9775052070617676, "learning_rate": 4.7491674334292646e-06, "loss": 1.0214, "step": 6882 }, { "epoch": 0.5306043786617329, "grad_norm": 3.3533380031585693, "learning_rate": 4.747920565973957e-06, "loss": 0.9444, "step": 6883 }, { "epoch": 0.53068146777675, "grad_norm": 3.5897982120513916, "learning_rate": 4.746673714234498e-06, "loss": 0.9681, "step": 6884 }, { "epoch": 0.5307585568917669, "grad_norm": 3.429609537124634, "learning_rate": 4.745426878288621e-06, "loss": 0.9459, "step": 6885 }, { "epoch": 0.5308356460067838, "grad_norm": 3.4824554920196533, "learning_rate": 4.744180058214059e-06, "loss": 1.0649, "step": 6886 }, { "epoch": 0.5309127351218008, "grad_norm": 3.27522611618042, "learning_rate": 4.742933254088547e-06, "loss": 0.8855, "step": 6887 }, { "epoch": 0.5309898242368177, "grad_norm": 3.366482973098755, "learning_rate": 4.741686465989814e-06, "loss": 0.8464, "step": 6888 }, { "epoch": 0.5310669133518348, "grad_norm": 3.3331689834594727, "learning_rate": 4.740439693995592e-06, "loss": 0.8612, "step": 6889 }, { "epoch": 0.5311440024668517, "grad_norm": 3.410470485687256, "learning_rate": 4.739192938183611e-06, "loss": 1.1055, "step": 6890 }, { "epoch": 0.5312210915818686, "grad_norm": 3.858147144317627, "learning_rate": 4.7379461986315995e-06, "loss": 0.9747, "step": 6891 }, { "epoch": 0.5312981806968856, "grad_norm": 3.707534074783325, "learning_rate": 4.736699475417285e-06, "loss": 0.8828, "step": 6892 }, { "epoch": 0.5313752698119025, "grad_norm": 3.58382248878479, "learning_rate": 4.735452768618395e-06, "loss": 0.9151, "step": 6893 }, { "epoch": 0.5314523589269196, "grad_norm": 3.5018043518066406, "learning_rate": 4.734206078312654e-06, "loss": 1.0009, "step": 6894 }, { "epoch": 0.5315294480419365, "grad_norm": 4.095643520355225, "learning_rate": 4.732959404577787e-06, "loss": 0.9141, "step": 6895 }, { "epoch": 0.5316065371569534, "grad_norm": 3.533647060394287, "learning_rate": 4.731712747491521e-06, "loss": 0.9161, "step": 6896 }, { "epoch": 0.5316836262719704, "grad_norm": 3.72644305229187, "learning_rate": 4.730466107131574e-06, "loss": 0.9536, "step": 6897 }, { "epoch": 0.5317607153869873, "grad_norm": 3.7895758152008057, "learning_rate": 4.729219483575669e-06, "loss": 0.9062, "step": 6898 }, { "epoch": 0.5318378045020044, "grad_norm": 3.6426897048950195, "learning_rate": 4.7279728769015266e-06, "loss": 0.9667, "step": 6899 }, { "epoch": 0.5319148936170213, "grad_norm": 3.507504463195801, "learning_rate": 4.726726287186866e-06, "loss": 0.8926, "step": 6900 }, { "epoch": 0.5319919827320382, "grad_norm": 3.811716318130493, "learning_rate": 4.725479714509408e-06, "loss": 0.9849, "step": 6901 }, { "epoch": 0.5320690718470552, "grad_norm": 3.6967577934265137, "learning_rate": 4.724233158946866e-06, "loss": 1.0016, "step": 6902 }, { "epoch": 0.5321461609620721, "grad_norm": 3.3442699909210205, "learning_rate": 4.722986620576959e-06, "loss": 0.8726, "step": 6903 }, { "epoch": 0.5322232500770891, "grad_norm": 3.455738067626953, "learning_rate": 4.721740099477402e-06, "loss": 0.9078, "step": 6904 }, { "epoch": 0.5323003391921061, "grad_norm": 3.491821765899658, "learning_rate": 4.720493595725908e-06, "loss": 0.9688, "step": 6905 }, { "epoch": 0.532377428307123, "grad_norm": 3.792142868041992, "learning_rate": 4.719247109400192e-06, "loss": 0.9775, "step": 6906 }, { "epoch": 0.53245451742214, "grad_norm": 3.832749605178833, "learning_rate": 4.718000640577963e-06, "loss": 1.0054, "step": 6907 }, { "epoch": 0.5325316065371569, "grad_norm": 3.693619728088379, "learning_rate": 4.716754189336934e-06, "loss": 0.9271, "step": 6908 }, { "epoch": 0.532608695652174, "grad_norm": 3.5125207901000977, "learning_rate": 4.715507755754815e-06, "loss": 0.8987, "step": 6909 }, { "epoch": 0.5326857847671909, "grad_norm": 3.387834310531616, "learning_rate": 4.714261339909313e-06, "loss": 0.9932, "step": 6910 }, { "epoch": 0.5327628738822078, "grad_norm": 3.5012171268463135, "learning_rate": 4.713014941878137e-06, "loss": 0.8553, "step": 6911 }, { "epoch": 0.5328399629972248, "grad_norm": 3.9803214073181152, "learning_rate": 4.711768561738993e-06, "loss": 1.0452, "step": 6912 }, { "epoch": 0.5329170521122417, "grad_norm": 3.555570363998413, "learning_rate": 4.710522199569586e-06, "loss": 1.0523, "step": 6913 }, { "epoch": 0.5329941412272587, "grad_norm": 3.6644721031188965, "learning_rate": 4.7092758554476215e-06, "loss": 0.9008, "step": 6914 }, { "epoch": 0.5330712303422757, "grad_norm": 3.4861278533935547, "learning_rate": 4.708029529450802e-06, "loss": 0.9557, "step": 6915 }, { "epoch": 0.5331483194572927, "grad_norm": 3.4730987548828125, "learning_rate": 4.706783221656828e-06, "loss": 0.9147, "step": 6916 }, { "epoch": 0.5332254085723096, "grad_norm": 3.5856359004974365, "learning_rate": 4.705536932143403e-06, "loss": 0.8769, "step": 6917 }, { "epoch": 0.5333024976873265, "grad_norm": 3.6524572372436523, "learning_rate": 4.704290660988224e-06, "loss": 0.9084, "step": 6918 }, { "epoch": 0.5333795868023435, "grad_norm": 3.7509005069732666, "learning_rate": 4.70304440826899e-06, "loss": 0.9222, "step": 6919 }, { "epoch": 0.5334566759173605, "grad_norm": 3.5913355350494385, "learning_rate": 4.701798174063402e-06, "loss": 0.9637, "step": 6920 }, { "epoch": 0.5335337650323775, "grad_norm": 3.756228446960449, "learning_rate": 4.700551958449151e-06, "loss": 0.9755, "step": 6921 }, { "epoch": 0.5336108541473944, "grad_norm": 3.908708333969116, "learning_rate": 4.699305761503935e-06, "loss": 1.0154, "step": 6922 }, { "epoch": 0.5336879432624113, "grad_norm": 3.226095676422119, "learning_rate": 4.698059583305447e-06, "loss": 0.855, "step": 6923 }, { "epoch": 0.5337650323774283, "grad_norm": 3.847050428390503, "learning_rate": 4.696813423931381e-06, "loss": 0.9876, "step": 6924 }, { "epoch": 0.5338421214924453, "grad_norm": 3.6332948207855225, "learning_rate": 4.695567283459426e-06, "loss": 0.9547, "step": 6925 }, { "epoch": 0.5339192106074623, "grad_norm": 3.968454122543335, "learning_rate": 4.694321161967275e-06, "loss": 0.9856, "step": 6926 }, { "epoch": 0.5339962997224792, "grad_norm": 3.6489675045013428, "learning_rate": 4.693075059532619e-06, "loss": 0.9324, "step": 6927 }, { "epoch": 0.5340733888374961, "grad_norm": 3.761967182159424, "learning_rate": 4.6918289762331405e-06, "loss": 0.9429, "step": 6928 }, { "epoch": 0.5341504779525131, "grad_norm": 3.6253440380096436, "learning_rate": 4.69058291214653e-06, "loss": 0.9184, "step": 6929 }, { "epoch": 0.53422756706753, "grad_norm": 3.8986105918884277, "learning_rate": 4.689336867350474e-06, "loss": 0.9953, "step": 6930 }, { "epoch": 0.5343046561825471, "grad_norm": 3.430659055709839, "learning_rate": 4.688090841922654e-06, "loss": 0.9899, "step": 6931 }, { "epoch": 0.534381745297564, "grad_norm": 3.6624832153320312, "learning_rate": 4.686844835940754e-06, "loss": 1.0142, "step": 6932 }, { "epoch": 0.5344588344125809, "grad_norm": 3.478834390640259, "learning_rate": 4.685598849482458e-06, "loss": 0.9255, "step": 6933 }, { "epoch": 0.5345359235275979, "grad_norm": 3.8827271461486816, "learning_rate": 4.6843528826254444e-06, "loss": 1.0197, "step": 6934 }, { "epoch": 0.5346130126426148, "grad_norm": 3.9673526287078857, "learning_rate": 4.683106935447395e-06, "loss": 0.8611, "step": 6935 }, { "epoch": 0.5346901017576319, "grad_norm": 3.5819478034973145, "learning_rate": 4.681861008025985e-06, "loss": 0.9893, "step": 6936 }, { "epoch": 0.5347671908726488, "grad_norm": 3.0837767124176025, "learning_rate": 4.680615100438894e-06, "loss": 0.9102, "step": 6937 }, { "epoch": 0.5348442799876657, "grad_norm": 3.779120922088623, "learning_rate": 4.679369212763799e-06, "loss": 0.9429, "step": 6938 }, { "epoch": 0.5349213691026827, "grad_norm": 3.4904768466949463, "learning_rate": 4.678123345078371e-06, "loss": 0.9892, "step": 6939 }, { "epoch": 0.5349984582176996, "grad_norm": 3.5319314002990723, "learning_rate": 4.676877497460286e-06, "loss": 0.9813, "step": 6940 }, { "epoch": 0.5350755473327167, "grad_norm": 4.093774795532227, "learning_rate": 4.675631669987217e-06, "loss": 1.0125, "step": 6941 }, { "epoch": 0.5351526364477336, "grad_norm": 3.4952986240386963, "learning_rate": 4.674385862736832e-06, "loss": 0.9641, "step": 6942 }, { "epoch": 0.5352297255627505, "grad_norm": 3.800241470336914, "learning_rate": 4.673140075786801e-06, "loss": 0.9452, "step": 6943 }, { "epoch": 0.5353068146777675, "grad_norm": 3.972257614135742, "learning_rate": 4.671894309214796e-06, "loss": 0.9489, "step": 6944 }, { "epoch": 0.5353839037927844, "grad_norm": 3.972130060195923, "learning_rate": 4.67064856309848e-06, "loss": 1.0165, "step": 6945 }, { "epoch": 0.5354609929078015, "grad_norm": 3.7796213626861572, "learning_rate": 4.669402837515521e-06, "loss": 0.8956, "step": 6946 }, { "epoch": 0.5355380820228184, "grad_norm": 3.68583345413208, "learning_rate": 4.668157132543582e-06, "loss": 0.8976, "step": 6947 }, { "epoch": 0.5356151711378353, "grad_norm": 3.53948974609375, "learning_rate": 4.666911448260327e-06, "loss": 0.9083, "step": 6948 }, { "epoch": 0.5356922602528523, "grad_norm": 3.7999818325042725, "learning_rate": 4.665665784743419e-06, "loss": 1.0559, "step": 6949 }, { "epoch": 0.5357693493678692, "grad_norm": 3.492060661315918, "learning_rate": 4.664420142070517e-06, "loss": 0.9844, "step": 6950 }, { "epoch": 0.5358464384828863, "grad_norm": 3.376392364501953, "learning_rate": 4.663174520319282e-06, "loss": 1.0456, "step": 6951 }, { "epoch": 0.5359235275979032, "grad_norm": 3.963566303253174, "learning_rate": 4.661928919567371e-06, "loss": 1.0061, "step": 6952 }, { "epoch": 0.5360006167129201, "grad_norm": 3.267530679702759, "learning_rate": 4.660683339892441e-06, "loss": 0.8218, "step": 6953 }, { "epoch": 0.5360777058279371, "grad_norm": 3.88205623626709, "learning_rate": 4.659437781372147e-06, "loss": 0.9725, "step": 6954 }, { "epoch": 0.536154794942954, "grad_norm": 3.550692081451416, "learning_rate": 4.658192244084143e-06, "loss": 0.9525, "step": 6955 }, { "epoch": 0.5362318840579711, "grad_norm": 3.2505733966827393, "learning_rate": 4.656946728106083e-06, "loss": 0.8847, "step": 6956 }, { "epoch": 0.536308973172988, "grad_norm": 3.462864398956299, "learning_rate": 4.6557012335156185e-06, "loss": 0.8702, "step": 6957 }, { "epoch": 0.5363860622880049, "grad_norm": 3.4686732292175293, "learning_rate": 4.654455760390398e-06, "loss": 0.8376, "step": 6958 }, { "epoch": 0.5364631514030219, "grad_norm": 3.5081446170806885, "learning_rate": 4.653210308808071e-06, "loss": 0.8766, "step": 6959 }, { "epoch": 0.5365402405180388, "grad_norm": 3.306342124938965, "learning_rate": 4.651964878846285e-06, "loss": 0.8244, "step": 6960 }, { "epoch": 0.5366173296330559, "grad_norm": 4.375651836395264, "learning_rate": 4.6507194705826866e-06, "loss": 1.039, "step": 6961 }, { "epoch": 0.5366944187480728, "grad_norm": 3.5302603244781494, "learning_rate": 4.649474084094921e-06, "loss": 0.9445, "step": 6962 }, { "epoch": 0.5367715078630897, "grad_norm": 3.6997478008270264, "learning_rate": 4.64822871946063e-06, "loss": 0.9587, "step": 6963 }, { "epoch": 0.5368485969781067, "grad_norm": 3.9736435413360596, "learning_rate": 4.646983376757457e-06, "loss": 0.955, "step": 6964 }, { "epoch": 0.5369256860931236, "grad_norm": 3.3650596141815186, "learning_rate": 4.645738056063042e-06, "loss": 0.9348, "step": 6965 }, { "epoch": 0.5370027752081407, "grad_norm": 3.6180570125579834, "learning_rate": 4.6444927574550245e-06, "loss": 0.7941, "step": 6966 }, { "epoch": 0.5370798643231576, "grad_norm": 3.7963132858276367, "learning_rate": 4.6432474810110415e-06, "loss": 0.8803, "step": 6967 }, { "epoch": 0.5371569534381745, "grad_norm": 3.3811287879943848, "learning_rate": 4.642002226808732e-06, "loss": 0.7674, "step": 6968 }, { "epoch": 0.5372340425531915, "grad_norm": 3.9231021404266357, "learning_rate": 4.640756994925726e-06, "loss": 1.0332, "step": 6969 }, { "epoch": 0.5373111316682084, "grad_norm": 3.508418321609497, "learning_rate": 4.639511785439664e-06, "loss": 0.9099, "step": 6970 }, { "epoch": 0.5373882207832255, "grad_norm": 3.725053310394287, "learning_rate": 4.638266598428175e-06, "loss": 0.8434, "step": 6971 }, { "epoch": 0.5374653098982424, "grad_norm": 3.9625444412231445, "learning_rate": 4.63702143396889e-06, "loss": 0.8459, "step": 6972 }, { "epoch": 0.5375423990132593, "grad_norm": 3.8281447887420654, "learning_rate": 4.635776292139437e-06, "loss": 0.9814, "step": 6973 }, { "epoch": 0.5376194881282763, "grad_norm": 4.015565872192383, "learning_rate": 4.634531173017448e-06, "loss": 1.0362, "step": 6974 }, { "epoch": 0.5376965772432932, "grad_norm": 3.4385459423065186, "learning_rate": 4.633286076680546e-06, "loss": 0.8451, "step": 6975 }, { "epoch": 0.5377736663583103, "grad_norm": 3.9049465656280518, "learning_rate": 4.632041003206359e-06, "loss": 0.7773, "step": 6976 }, { "epoch": 0.5378507554733272, "grad_norm": 5.6488142013549805, "learning_rate": 4.630795952672509e-06, "loss": 0.9841, "step": 6977 }, { "epoch": 0.5379278445883441, "grad_norm": 3.4264445304870605, "learning_rate": 4.62955092515662e-06, "loss": 0.9391, "step": 6978 }, { "epoch": 0.5380049337033611, "grad_norm": 3.8330531120300293, "learning_rate": 4.628305920736311e-06, "loss": 1.0133, "step": 6979 }, { "epoch": 0.538082022818378, "grad_norm": 3.940965414047241, "learning_rate": 4.627060939489204e-06, "loss": 0.959, "step": 6980 }, { "epoch": 0.538159111933395, "grad_norm": 3.4086647033691406, "learning_rate": 4.625815981492916e-06, "loss": 0.95, "step": 6981 }, { "epoch": 0.538236201048412, "grad_norm": 3.4563660621643066, "learning_rate": 4.6245710468250626e-06, "loss": 0.9469, "step": 6982 }, { "epoch": 0.5383132901634289, "grad_norm": 3.675934076309204, "learning_rate": 4.623326135563261e-06, "loss": 0.8943, "step": 6983 }, { "epoch": 0.5383903792784459, "grad_norm": 3.754777431488037, "learning_rate": 4.622081247785123e-06, "loss": 0.9712, "step": 6984 }, { "epoch": 0.5384674683934628, "grad_norm": 3.8146581649780273, "learning_rate": 4.620836383568262e-06, "loss": 0.9867, "step": 6985 }, { "epoch": 0.5385445575084798, "grad_norm": 3.5257890224456787, "learning_rate": 4.61959154299029e-06, "loss": 0.9331, "step": 6986 }, { "epoch": 0.5386216466234968, "grad_norm": 3.52950119972229, "learning_rate": 4.618346726128814e-06, "loss": 0.8518, "step": 6987 }, { "epoch": 0.5386987357385137, "grad_norm": 3.408184766769409, "learning_rate": 4.6171019330614424e-06, "loss": 0.9116, "step": 6988 }, { "epoch": 0.5387758248535307, "grad_norm": 4.128994941711426, "learning_rate": 4.615857163865784e-06, "loss": 0.9861, "step": 6989 }, { "epoch": 0.5388529139685476, "grad_norm": 3.886155366897583, "learning_rate": 4.614612418619441e-06, "loss": 1.0266, "step": 6990 }, { "epoch": 0.5389300030835646, "grad_norm": 3.525916814804077, "learning_rate": 4.613367697400017e-06, "loss": 0.8755, "step": 6991 }, { "epoch": 0.5390070921985816, "grad_norm": 3.6894612312316895, "learning_rate": 4.612123000285116e-06, "loss": 0.917, "step": 6992 }, { "epoch": 0.5390841813135985, "grad_norm": 3.610119342803955, "learning_rate": 4.610878327352336e-06, "loss": 0.9675, "step": 6993 }, { "epoch": 0.5391612704286155, "grad_norm": 3.5024073123931885, "learning_rate": 4.6096336786792775e-06, "loss": 0.9345, "step": 6994 }, { "epoch": 0.5392383595436324, "grad_norm": 3.419611930847168, "learning_rate": 4.608389054343537e-06, "loss": 0.8538, "step": 6995 }, { "epoch": 0.5393154486586494, "grad_norm": 3.4121594429016113, "learning_rate": 4.607144454422711e-06, "loss": 0.9487, "step": 6996 }, { "epoch": 0.5393925377736664, "grad_norm": 3.849426746368408, "learning_rate": 4.605899878994395e-06, "loss": 0.9854, "step": 6997 }, { "epoch": 0.5394696268886833, "grad_norm": 3.513972759246826, "learning_rate": 4.604655328136177e-06, "loss": 0.948, "step": 6998 }, { "epoch": 0.5395467160037003, "grad_norm": 3.6121532917022705, "learning_rate": 4.603410801925654e-06, "loss": 1.0326, "step": 6999 }, { "epoch": 0.5396238051187172, "grad_norm": 3.5565638542175293, "learning_rate": 4.602166300440415e-06, "loss": 0.8536, "step": 7000 }, { "epoch": 0.5397008942337342, "grad_norm": 3.5659432411193848, "learning_rate": 4.600921823758044e-06, "loss": 0.9094, "step": 7001 }, { "epoch": 0.5397779833487512, "grad_norm": 3.8643648624420166, "learning_rate": 4.599677371956132e-06, "loss": 0.9769, "step": 7002 }, { "epoch": 0.5398550724637681, "grad_norm": 3.5835399627685547, "learning_rate": 4.598432945112262e-06, "loss": 0.8676, "step": 7003 }, { "epoch": 0.5399321615787851, "grad_norm": 3.427264451980591, "learning_rate": 4.597188543304017e-06, "loss": 0.9429, "step": 7004 }, { "epoch": 0.540009250693802, "grad_norm": 4.065280437469482, "learning_rate": 4.595944166608982e-06, "loss": 0.8582, "step": 7005 }, { "epoch": 0.540086339808819, "grad_norm": 3.8755879402160645, "learning_rate": 4.594699815104735e-06, "loss": 0.9051, "step": 7006 }, { "epoch": 0.540163428923836, "grad_norm": 3.609768867492676, "learning_rate": 4.593455488868855e-06, "loss": 0.9795, "step": 7007 }, { "epoch": 0.5402405180388529, "grad_norm": 3.6061177253723145, "learning_rate": 4.59221118797892e-06, "loss": 0.8821, "step": 7008 }, { "epoch": 0.5403176071538699, "grad_norm": 3.737497568130493, "learning_rate": 4.590966912512505e-06, "loss": 0.8979, "step": 7009 }, { "epoch": 0.5403946962688868, "grad_norm": 3.619135618209839, "learning_rate": 4.589722662547185e-06, "loss": 0.8114, "step": 7010 }, { "epoch": 0.5404717853839038, "grad_norm": 3.6571645736694336, "learning_rate": 4.588478438160532e-06, "loss": 0.9064, "step": 7011 }, { "epoch": 0.5405488744989208, "grad_norm": 3.8513779640197754, "learning_rate": 4.5872342394301175e-06, "loss": 0.9858, "step": 7012 }, { "epoch": 0.5406259636139377, "grad_norm": 3.6383817195892334, "learning_rate": 4.58599006643351e-06, "loss": 0.979, "step": 7013 }, { "epoch": 0.5407030527289547, "grad_norm": 3.927966356277466, "learning_rate": 4.584745919248275e-06, "loss": 1.0223, "step": 7014 }, { "epoch": 0.5407801418439716, "grad_norm": 3.733217716217041, "learning_rate": 4.5835017979519844e-06, "loss": 0.9701, "step": 7015 }, { "epoch": 0.5408572309589886, "grad_norm": 3.937772035598755, "learning_rate": 4.582257702622199e-06, "loss": 0.9882, "step": 7016 }, { "epoch": 0.5409343200740055, "grad_norm": 3.6561355590820312, "learning_rate": 4.581013633336483e-06, "loss": 0.8608, "step": 7017 }, { "epoch": 0.5410114091890225, "grad_norm": 3.8480238914489746, "learning_rate": 4.579769590172396e-06, "loss": 0.9592, "step": 7018 }, { "epoch": 0.5410884983040395, "grad_norm": 3.8090572357177734, "learning_rate": 4.578525573207499e-06, "loss": 0.9319, "step": 7019 }, { "epoch": 0.5411655874190564, "grad_norm": 3.5534706115722656, "learning_rate": 4.57728158251935e-06, "loss": 0.8965, "step": 7020 }, { "epoch": 0.5412426765340734, "grad_norm": 3.8051340579986572, "learning_rate": 4.576037618185504e-06, "loss": 1.0375, "step": 7021 }, { "epoch": 0.5413197656490903, "grad_norm": 3.5272462368011475, "learning_rate": 4.574793680283518e-06, "loss": 0.8829, "step": 7022 }, { "epoch": 0.5413968547641073, "grad_norm": 3.668534278869629, "learning_rate": 4.573549768890944e-06, "loss": 1.0591, "step": 7023 }, { "epoch": 0.5414739438791243, "grad_norm": 3.377134084701538, "learning_rate": 4.572305884085332e-06, "loss": 0.9829, "step": 7024 }, { "epoch": 0.5415510329941412, "grad_norm": 3.962786912918091, "learning_rate": 4.5710620259442325e-06, "loss": 1.0349, "step": 7025 }, { "epoch": 0.5416281221091582, "grad_norm": 3.8737564086914062, "learning_rate": 4.5698181945451966e-06, "loss": 0.995, "step": 7026 }, { "epoch": 0.5417052112241751, "grad_norm": 3.767915725708008, "learning_rate": 4.568574389965766e-06, "loss": 0.8897, "step": 7027 }, { "epoch": 0.5417823003391921, "grad_norm": 3.9697983264923096, "learning_rate": 4.567330612283488e-06, "loss": 0.8637, "step": 7028 }, { "epoch": 0.5418593894542091, "grad_norm": 3.8324038982391357, "learning_rate": 4.566086861575908e-06, "loss": 1.0064, "step": 7029 }, { "epoch": 0.541936478569226, "grad_norm": 3.6003153324127197, "learning_rate": 4.564843137920561e-06, "loss": 0.9691, "step": 7030 }, { "epoch": 0.542013567684243, "grad_norm": 3.5627217292785645, "learning_rate": 4.563599441394994e-06, "loss": 0.86, "step": 7031 }, { "epoch": 0.5420906567992599, "grad_norm": 3.6470260620117188, "learning_rate": 4.56235577207674e-06, "loss": 0.9588, "step": 7032 }, { "epoch": 0.5421677459142769, "grad_norm": 4.533912181854248, "learning_rate": 4.561112130043337e-06, "loss": 1.0559, "step": 7033 }, { "epoch": 0.5422448350292939, "grad_norm": 3.7450127601623535, "learning_rate": 4.5598685153723205e-06, "loss": 0.9593, "step": 7034 }, { "epoch": 0.5423219241443108, "grad_norm": 3.7098305225372314, "learning_rate": 4.558624928141222e-06, "loss": 1.0346, "step": 7035 }, { "epoch": 0.5423990132593278, "grad_norm": 3.9631752967834473, "learning_rate": 4.557381368427573e-06, "loss": 0.9668, "step": 7036 }, { "epoch": 0.5424761023743447, "grad_norm": 3.5435612201690674, "learning_rate": 4.5561378363089035e-06, "loss": 1.0405, "step": 7037 }, { "epoch": 0.5425531914893617, "grad_norm": 3.9541807174682617, "learning_rate": 4.554894331862741e-06, "loss": 0.9185, "step": 7038 }, { "epoch": 0.5426302806043787, "grad_norm": 3.6304094791412354, "learning_rate": 4.553650855166612e-06, "loss": 0.9789, "step": 7039 }, { "epoch": 0.5427073697193956, "grad_norm": 3.823951005935669, "learning_rate": 4.55240740629804e-06, "loss": 0.9947, "step": 7040 }, { "epoch": 0.5427844588344126, "grad_norm": 3.5936341285705566, "learning_rate": 4.551163985334548e-06, "loss": 0.971, "step": 7041 }, { "epoch": 0.5428615479494295, "grad_norm": 3.330362319946289, "learning_rate": 4.549920592353659e-06, "loss": 0.8836, "step": 7042 }, { "epoch": 0.5429386370644464, "grad_norm": 3.501664400100708, "learning_rate": 4.548677227432888e-06, "loss": 0.9594, "step": 7043 }, { "epoch": 0.5430157261794635, "grad_norm": 3.7609310150146484, "learning_rate": 4.547433890649754e-06, "loss": 0.83, "step": 7044 }, { "epoch": 0.5430928152944804, "grad_norm": 3.811150312423706, "learning_rate": 4.546190582081774e-06, "loss": 0.9383, "step": 7045 }, { "epoch": 0.5431699044094974, "grad_norm": 3.409965991973877, "learning_rate": 4.544947301806461e-06, "loss": 0.894, "step": 7046 }, { "epoch": 0.5432469935245143, "grad_norm": 3.6347696781158447, "learning_rate": 4.543704049901326e-06, "loss": 0.8651, "step": 7047 }, { "epoch": 0.5433240826395312, "grad_norm": 3.6981849670410156, "learning_rate": 4.542460826443881e-06, "loss": 0.964, "step": 7048 }, { "epoch": 0.5434011717545483, "grad_norm": 3.7254111766815186, "learning_rate": 4.541217631511633e-06, "loss": 0.9934, "step": 7049 }, { "epoch": 0.5434782608695652, "grad_norm": 3.680558443069458, "learning_rate": 4.5399744651820915e-06, "loss": 0.7968, "step": 7050 }, { "epoch": 0.5435553499845822, "grad_norm": 3.6431643962860107, "learning_rate": 4.538731327532758e-06, "loss": 0.9735, "step": 7051 }, { "epoch": 0.5436324390995991, "grad_norm": 3.7536261081695557, "learning_rate": 4.5374882186411375e-06, "loss": 0.8981, "step": 7052 }, { "epoch": 0.543709528214616, "grad_norm": 3.773322582244873, "learning_rate": 4.536245138584731e-06, "loss": 1.0596, "step": 7053 }, { "epoch": 0.5437866173296331, "grad_norm": 3.827707052230835, "learning_rate": 4.5350020874410385e-06, "loss": 0.9203, "step": 7054 }, { "epoch": 0.54386370644465, "grad_norm": 3.5610642433166504, "learning_rate": 4.533759065287557e-06, "loss": 0.9087, "step": 7055 }, { "epoch": 0.543940795559667, "grad_norm": 3.6839845180511475, "learning_rate": 4.5325160722017845e-06, "loss": 0.8665, "step": 7056 }, { "epoch": 0.5440178846746839, "grad_norm": 3.7331130504608154, "learning_rate": 4.531273108261213e-06, "loss": 0.8876, "step": 7057 }, { "epoch": 0.5440949737897008, "grad_norm": 3.359189510345459, "learning_rate": 4.530030173543334e-06, "loss": 0.9535, "step": 7058 }, { "epoch": 0.5441720629047179, "grad_norm": 3.495391368865967, "learning_rate": 4.528787268125641e-06, "loss": 0.8966, "step": 7059 }, { "epoch": 0.5442491520197348, "grad_norm": 3.6998205184936523, "learning_rate": 4.527544392085622e-06, "loss": 0.9623, "step": 7060 }, { "epoch": 0.5443262411347518, "grad_norm": 3.4941718578338623, "learning_rate": 4.526301545500762e-06, "loss": 1.0587, "step": 7061 }, { "epoch": 0.5444033302497687, "grad_norm": 3.546320676803589, "learning_rate": 4.5250587284485474e-06, "loss": 0.9509, "step": 7062 }, { "epoch": 0.5444804193647856, "grad_norm": 3.8419322967529297, "learning_rate": 4.523815941006463e-06, "loss": 0.9845, "step": 7063 }, { "epoch": 0.5445575084798027, "grad_norm": 3.7636489868164062, "learning_rate": 4.522573183251987e-06, "loss": 1.0808, "step": 7064 }, { "epoch": 0.5446345975948196, "grad_norm": 3.6135027408599854, "learning_rate": 4.5213304552626e-06, "loss": 0.9379, "step": 7065 }, { "epoch": 0.5447116867098366, "grad_norm": 4.0867414474487305, "learning_rate": 4.5200877571157815e-06, "loss": 0.969, "step": 7066 }, { "epoch": 0.5447887758248535, "grad_norm": 3.768380641937256, "learning_rate": 4.518845088889004e-06, "loss": 1.017, "step": 7067 }, { "epoch": 0.5448658649398704, "grad_norm": 3.526736259460449, "learning_rate": 4.517602450659746e-06, "loss": 0.8962, "step": 7068 }, { "epoch": 0.5449429540548875, "grad_norm": 3.6190271377563477, "learning_rate": 4.516359842505475e-06, "loss": 0.9257, "step": 7069 }, { "epoch": 0.5450200431699044, "grad_norm": 3.3780858516693115, "learning_rate": 4.515117264503662e-06, "loss": 0.9142, "step": 7070 }, { "epoch": 0.5450971322849214, "grad_norm": 3.3571228981018066, "learning_rate": 4.513874716731778e-06, "loss": 0.9285, "step": 7071 }, { "epoch": 0.5451742213999383, "grad_norm": 3.818427801132202, "learning_rate": 4.5126321992672866e-06, "loss": 0.928, "step": 7072 }, { "epoch": 0.5452513105149552, "grad_norm": 3.895630359649658, "learning_rate": 4.511389712187654e-06, "loss": 0.9358, "step": 7073 }, { "epoch": 0.5453283996299723, "grad_norm": 3.877424955368042, "learning_rate": 4.510147255570342e-06, "loss": 1.0102, "step": 7074 }, { "epoch": 0.5454054887449892, "grad_norm": 3.911123752593994, "learning_rate": 4.508904829492812e-06, "loss": 0.9505, "step": 7075 }, { "epoch": 0.5454825778600062, "grad_norm": 3.7010762691497803, "learning_rate": 4.507662434032522e-06, "loss": 0.98, "step": 7076 }, { "epoch": 0.5455596669750231, "grad_norm": 3.6135289669036865, "learning_rate": 4.50642006926693e-06, "loss": 0.9494, "step": 7077 }, { "epoch": 0.54563675609004, "grad_norm": 3.883880138397217, "learning_rate": 4.505177735273489e-06, "loss": 0.9959, "step": 7078 }, { "epoch": 0.5457138452050571, "grad_norm": 3.969799518585205, "learning_rate": 4.503935432129656e-06, "loss": 0.9272, "step": 7079 }, { "epoch": 0.545790934320074, "grad_norm": 4.497955799102783, "learning_rate": 4.5026931599128774e-06, "loss": 0.9667, "step": 7080 }, { "epoch": 0.545868023435091, "grad_norm": 3.585750102996826, "learning_rate": 4.501450918700605e-06, "loss": 0.9303, "step": 7081 }, { "epoch": 0.5459451125501079, "grad_norm": 3.6245148181915283, "learning_rate": 4.5002087085702865e-06, "loss": 0.9388, "step": 7082 }, { "epoch": 0.5460222016651248, "grad_norm": 3.6533985137939453, "learning_rate": 4.498966529599365e-06, "loss": 1.0059, "step": 7083 }, { "epoch": 0.5460992907801419, "grad_norm": 3.751574754714966, "learning_rate": 4.497724381865287e-06, "loss": 0.9515, "step": 7084 }, { "epoch": 0.5461763798951588, "grad_norm": 3.4590702056884766, "learning_rate": 4.496482265445492e-06, "loss": 0.9293, "step": 7085 }, { "epoch": 0.5462534690101758, "grad_norm": 3.8915021419525146, "learning_rate": 4.49524018041742e-06, "loss": 1.051, "step": 7086 }, { "epoch": 0.5463305581251927, "grad_norm": 4.0933990478515625, "learning_rate": 4.493998126858508e-06, "loss": 0.9782, "step": 7087 }, { "epoch": 0.5464076472402097, "grad_norm": 3.402745485305786, "learning_rate": 4.492756104846193e-06, "loss": 0.9318, "step": 7088 }, { "epoch": 0.5464847363552267, "grad_norm": 3.519670009613037, "learning_rate": 4.491514114457907e-06, "loss": 0.9108, "step": 7089 }, { "epoch": 0.5465618254702436, "grad_norm": 3.736721992492676, "learning_rate": 4.490272155771084e-06, "loss": 0.8759, "step": 7090 }, { "epoch": 0.5466389145852606, "grad_norm": 3.819697141647339, "learning_rate": 4.489030228863151e-06, "loss": 1.0047, "step": 7091 }, { "epoch": 0.5467160037002775, "grad_norm": 3.8831934928894043, "learning_rate": 4.487788333811536e-06, "loss": 0.9225, "step": 7092 }, { "epoch": 0.5467930928152945, "grad_norm": 3.722339153289795, "learning_rate": 4.486546470693667e-06, "loss": 0.8511, "step": 7093 }, { "epoch": 0.5468701819303114, "grad_norm": 3.325698137283325, "learning_rate": 4.4853046395869655e-06, "loss": 0.8824, "step": 7094 }, { "epoch": 0.5469472710453284, "grad_norm": 3.8227202892303467, "learning_rate": 4.484062840568854e-06, "loss": 0.918, "step": 7095 }, { "epoch": 0.5470243601603454, "grad_norm": 3.414175033569336, "learning_rate": 4.4828210737167535e-06, "loss": 0.9069, "step": 7096 }, { "epoch": 0.5471014492753623, "grad_norm": 3.996797800064087, "learning_rate": 4.481579339108079e-06, "loss": 1.0326, "step": 7097 }, { "epoch": 0.5471785383903793, "grad_norm": 3.6179771423339844, "learning_rate": 4.480337636820249e-06, "loss": 0.9529, "step": 7098 }, { "epoch": 0.5472556275053962, "grad_norm": 3.6945784091949463, "learning_rate": 4.479095966930674e-06, "loss": 1.1146, "step": 7099 }, { "epoch": 0.5473327166204132, "grad_norm": 3.5141420364379883, "learning_rate": 4.477854329516769e-06, "loss": 0.8478, "step": 7100 }, { "epoch": 0.5474098057354302, "grad_norm": 3.9102392196655273, "learning_rate": 4.476612724655943e-06, "loss": 0.9833, "step": 7101 }, { "epoch": 0.5474868948504471, "grad_norm": 3.6566107273101807, "learning_rate": 4.475371152425599e-06, "loss": 1.0023, "step": 7102 }, { "epoch": 0.5475639839654641, "grad_norm": 3.598893880844116, "learning_rate": 4.47412961290315e-06, "loss": 0.9257, "step": 7103 }, { "epoch": 0.547641073080481, "grad_norm": 3.900968313217163, "learning_rate": 4.472888106165995e-06, "loss": 0.9892, "step": 7104 }, { "epoch": 0.547718162195498, "grad_norm": 3.7693870067596436, "learning_rate": 4.471646632291539e-06, "loss": 0.9436, "step": 7105 }, { "epoch": 0.547795251310515, "grad_norm": 3.838836193084717, "learning_rate": 4.470405191357176e-06, "loss": 0.9466, "step": 7106 }, { "epoch": 0.5478723404255319, "grad_norm": 3.1654272079467773, "learning_rate": 4.469163783440308e-06, "loss": 0.823, "step": 7107 }, { "epoch": 0.5479494295405489, "grad_norm": 3.3918416500091553, "learning_rate": 4.467922408618329e-06, "loss": 0.9723, "step": 7108 }, { "epoch": 0.5480265186555658, "grad_norm": 4.015628814697266, "learning_rate": 4.466681066968632e-06, "loss": 0.9698, "step": 7109 }, { "epoch": 0.5481036077705828, "grad_norm": 3.8677635192871094, "learning_rate": 4.465439758568607e-06, "loss": 0.9895, "step": 7110 }, { "epoch": 0.5481806968855998, "grad_norm": 3.577852249145508, "learning_rate": 4.464198483495647e-06, "loss": 0.9398, "step": 7111 }, { "epoch": 0.5482577860006167, "grad_norm": 3.617177724838257, "learning_rate": 4.462957241827135e-06, "loss": 0.9324, "step": 7112 }, { "epoch": 0.5483348751156337, "grad_norm": 3.606205463409424, "learning_rate": 4.461716033640457e-06, "loss": 0.916, "step": 7113 }, { "epoch": 0.5484119642306506, "grad_norm": 3.655104637145996, "learning_rate": 4.460474859012998e-06, "loss": 0.9226, "step": 7114 }, { "epoch": 0.5484890533456676, "grad_norm": 3.676701068878174, "learning_rate": 4.4592337180221365e-06, "loss": 0.9754, "step": 7115 }, { "epoch": 0.5485661424606846, "grad_norm": 3.2556674480438232, "learning_rate": 4.457992610745252e-06, "loss": 0.8332, "step": 7116 }, { "epoch": 0.5486432315757015, "grad_norm": 3.65635347366333, "learning_rate": 4.456751537259721e-06, "loss": 0.9277, "step": 7117 }, { "epoch": 0.5487203206907185, "grad_norm": 3.583652973175049, "learning_rate": 4.455510497642918e-06, "loss": 0.9282, "step": 7118 }, { "epoch": 0.5487974098057354, "grad_norm": 3.441347599029541, "learning_rate": 4.454269491972215e-06, "loss": 0.8934, "step": 7119 }, { "epoch": 0.5488744989207524, "grad_norm": 3.9320881366729736, "learning_rate": 4.453028520324984e-06, "loss": 0.9556, "step": 7120 }, { "epoch": 0.5489515880357694, "grad_norm": 3.3631417751312256, "learning_rate": 4.4517875827785905e-06, "loss": 0.9234, "step": 7121 }, { "epoch": 0.5490286771507863, "grad_norm": 3.7885899543762207, "learning_rate": 4.450546679410403e-06, "loss": 1.0157, "step": 7122 }, { "epoch": 0.5491057662658033, "grad_norm": 3.6120402812957764, "learning_rate": 4.449305810297784e-06, "loss": 0.8965, "step": 7123 }, { "epoch": 0.5491828553808202, "grad_norm": 3.5970468521118164, "learning_rate": 4.448064975518096e-06, "loss": 0.9291, "step": 7124 }, { "epoch": 0.5492599444958371, "grad_norm": 3.6783077716827393, "learning_rate": 4.446824175148699e-06, "loss": 0.82, "step": 7125 }, { "epoch": 0.5493370336108542, "grad_norm": 3.4367566108703613, "learning_rate": 4.445583409266947e-06, "loss": 1.0236, "step": 7126 }, { "epoch": 0.5494141227258711, "grad_norm": 4.298835277557373, "learning_rate": 4.444342677950201e-06, "loss": 1.0732, "step": 7127 }, { "epoch": 0.5494912118408881, "grad_norm": 3.573850631713867, "learning_rate": 4.443101981275809e-06, "loss": 0.9049, "step": 7128 }, { "epoch": 0.549568300955905, "grad_norm": 3.9243526458740234, "learning_rate": 4.441861319321125e-06, "loss": 1.0487, "step": 7129 }, { "epoch": 0.549645390070922, "grad_norm": 3.5670223236083984, "learning_rate": 4.440620692163498e-06, "loss": 0.9543, "step": 7130 }, { "epoch": 0.549722479185939, "grad_norm": 5.046988010406494, "learning_rate": 4.439380099880272e-06, "loss": 1.1399, "step": 7131 }, { "epoch": 0.5497995683009559, "grad_norm": 4.248262405395508, "learning_rate": 4.438139542548795e-06, "loss": 0.9838, "step": 7132 }, { "epoch": 0.5498766574159729, "grad_norm": 3.758612632751465, "learning_rate": 4.436899020246407e-06, "loss": 0.8835, "step": 7133 }, { "epoch": 0.5499537465309898, "grad_norm": 3.501319408416748, "learning_rate": 4.435658533050448e-06, "loss": 0.8992, "step": 7134 }, { "epoch": 0.5500308356460067, "grad_norm": 3.7768168449401855, "learning_rate": 4.434418081038256e-06, "loss": 0.9216, "step": 7135 }, { "epoch": 0.5501079247610238, "grad_norm": 3.8390257358551025, "learning_rate": 4.43317766428717e-06, "loss": 0.997, "step": 7136 }, { "epoch": 0.5501850138760407, "grad_norm": 3.9735095500946045, "learning_rate": 4.4319372828745185e-06, "loss": 1.0097, "step": 7137 }, { "epoch": 0.5502621029910577, "grad_norm": 3.6092162132263184, "learning_rate": 4.430696936877638e-06, "loss": 0.9526, "step": 7138 }, { "epoch": 0.5503391921060746, "grad_norm": 3.396688461303711, "learning_rate": 4.429456626373853e-06, "loss": 0.8063, "step": 7139 }, { "epoch": 0.5504162812210915, "grad_norm": 3.1804189682006836, "learning_rate": 4.428216351440492e-06, "loss": 0.733, "step": 7140 }, { "epoch": 0.5504933703361086, "grad_norm": 3.653811454772949, "learning_rate": 4.4269761121548815e-06, "loss": 1.1142, "step": 7141 }, { "epoch": 0.5505704594511255, "grad_norm": 3.4570720195770264, "learning_rate": 4.425735908594342e-06, "loss": 0.8599, "step": 7142 }, { "epoch": 0.5506475485661425, "grad_norm": 3.407050609588623, "learning_rate": 4.424495740836193e-06, "loss": 0.8526, "step": 7143 }, { "epoch": 0.5507246376811594, "grad_norm": 4.278409481048584, "learning_rate": 4.4232556089577575e-06, "loss": 1.0937, "step": 7144 }, { "epoch": 0.5508017267961763, "grad_norm": 3.6288211345672607, "learning_rate": 4.4220155130363445e-06, "loss": 1.0142, "step": 7145 }, { "epoch": 0.5508788159111934, "grad_norm": 4.041045188903809, "learning_rate": 4.420775453149273e-06, "loss": 0.955, "step": 7146 }, { "epoch": 0.5509559050262103, "grad_norm": 3.636233329772949, "learning_rate": 4.4195354293738484e-06, "loss": 0.937, "step": 7147 }, { "epoch": 0.5510329941412273, "grad_norm": 3.6226770877838135, "learning_rate": 4.418295441787387e-06, "loss": 0.9552, "step": 7148 }, { "epoch": 0.5511100832562442, "grad_norm": 3.7020938396453857, "learning_rate": 4.417055490467191e-06, "loss": 1.002, "step": 7149 }, { "epoch": 0.5511871723712611, "grad_norm": 3.8105432987213135, "learning_rate": 4.4158155754905655e-06, "loss": 0.9974, "step": 7150 }, { "epoch": 0.5512642614862782, "grad_norm": 3.620668888092041, "learning_rate": 4.414575696934814e-06, "loss": 0.9996, "step": 7151 }, { "epoch": 0.5513413506012951, "grad_norm": 3.2780075073242188, "learning_rate": 4.413335854877237e-06, "loss": 0.9061, "step": 7152 }, { "epoch": 0.5514184397163121, "grad_norm": 3.7314953804016113, "learning_rate": 4.41209604939513e-06, "loss": 1.008, "step": 7153 }, { "epoch": 0.551495528831329, "grad_norm": 3.6991615295410156, "learning_rate": 4.4108562805657886e-06, "loss": 0.9696, "step": 7154 }, { "epoch": 0.5515726179463459, "grad_norm": 3.4623279571533203, "learning_rate": 4.409616548466508e-06, "loss": 0.8454, "step": 7155 }, { "epoch": 0.551649707061363, "grad_norm": 3.6211092472076416, "learning_rate": 4.408376853174578e-06, "loss": 0.9226, "step": 7156 }, { "epoch": 0.5517267961763799, "grad_norm": 3.806649684906006, "learning_rate": 4.407137194767286e-06, "loss": 0.9433, "step": 7157 }, { "epoch": 0.5518038852913969, "grad_norm": 3.5186119079589844, "learning_rate": 4.40589757332192e-06, "loss": 0.9126, "step": 7158 }, { "epoch": 0.5518809744064138, "grad_norm": 3.941894054412842, "learning_rate": 4.404657988915764e-06, "loss": 1.053, "step": 7159 }, { "epoch": 0.5519580635214307, "grad_norm": 3.503800868988037, "learning_rate": 4.4034184416260975e-06, "loss": 0.8895, "step": 7160 }, { "epoch": 0.5520351526364478, "grad_norm": 3.7842636108398438, "learning_rate": 4.402178931530202e-06, "loss": 0.9407, "step": 7161 }, { "epoch": 0.5521122417514647, "grad_norm": 3.6619977951049805, "learning_rate": 4.400939458705356e-06, "loss": 0.9676, "step": 7162 }, { "epoch": 0.5521893308664817, "grad_norm": 3.5588581562042236, "learning_rate": 4.3997000232288295e-06, "loss": 0.8884, "step": 7163 }, { "epoch": 0.5522664199814986, "grad_norm": 3.8872478008270264, "learning_rate": 4.398460625177899e-06, "loss": 1.0666, "step": 7164 }, { "epoch": 0.5523435090965155, "grad_norm": 3.6066267490386963, "learning_rate": 4.397221264629833e-06, "loss": 0.8131, "step": 7165 }, { "epoch": 0.5524205982115326, "grad_norm": 4.402515411376953, "learning_rate": 4.395981941661897e-06, "loss": 0.9393, "step": 7166 }, { "epoch": 0.5524976873265495, "grad_norm": 3.6887872219085693, "learning_rate": 4.394742656351362e-06, "loss": 0.8782, "step": 7167 }, { "epoch": 0.5525747764415665, "grad_norm": 3.382742404937744, "learning_rate": 4.393503408775485e-06, "loss": 0.9317, "step": 7168 }, { "epoch": 0.5526518655565834, "grad_norm": 3.7242555618286133, "learning_rate": 4.39226419901153e-06, "loss": 0.9919, "step": 7169 }, { "epoch": 0.5527289546716003, "grad_norm": 3.5612120628356934, "learning_rate": 4.391025027136756e-06, "loss": 1.0148, "step": 7170 }, { "epoch": 0.5528060437866174, "grad_norm": 3.597330331802368, "learning_rate": 4.389785893228416e-06, "loss": 0.9333, "step": 7171 }, { "epoch": 0.5528831329016343, "grad_norm": 3.885812520980835, "learning_rate": 4.388546797363767e-06, "loss": 0.949, "step": 7172 }, { "epoch": 0.5529602220166513, "grad_norm": 3.399912118911743, "learning_rate": 4.387307739620057e-06, "loss": 0.906, "step": 7173 }, { "epoch": 0.5530373111316682, "grad_norm": 3.6478137969970703, "learning_rate": 4.386068720074536e-06, "loss": 0.9246, "step": 7174 }, { "epoch": 0.5531144002466851, "grad_norm": 3.986933946609497, "learning_rate": 4.384829738804452e-06, "loss": 0.9272, "step": 7175 }, { "epoch": 0.5531914893617021, "grad_norm": 3.6242878437042236, "learning_rate": 4.383590795887046e-06, "loss": 0.9373, "step": 7176 }, { "epoch": 0.5532685784767191, "grad_norm": 3.7404351234436035, "learning_rate": 4.382351891399562e-06, "loss": 0.9936, "step": 7177 }, { "epoch": 0.5533456675917361, "grad_norm": 3.527928590774536, "learning_rate": 4.38111302541924e-06, "loss": 0.947, "step": 7178 }, { "epoch": 0.553422756706753, "grad_norm": 3.5108187198638916, "learning_rate": 4.379874198023314e-06, "loss": 0.8112, "step": 7179 }, { "epoch": 0.5534998458217699, "grad_norm": 3.2686216831207275, "learning_rate": 4.37863540928902e-06, "loss": 0.8961, "step": 7180 }, { "epoch": 0.553576934936787, "grad_norm": 3.910783290863037, "learning_rate": 4.3773966592935914e-06, "loss": 1.0448, "step": 7181 }, { "epoch": 0.5536540240518039, "grad_norm": 3.619373083114624, "learning_rate": 4.3761579481142555e-06, "loss": 0.9193, "step": 7182 }, { "epoch": 0.5537311131668209, "grad_norm": 3.356902837753296, "learning_rate": 4.374919275828241e-06, "loss": 0.8701, "step": 7183 }, { "epoch": 0.5538082022818378, "grad_norm": 4.081925392150879, "learning_rate": 4.373680642512773e-06, "loss": 1.0359, "step": 7184 }, { "epoch": 0.5538852913968547, "grad_norm": 3.6913883686065674, "learning_rate": 4.372442048245071e-06, "loss": 1.0076, "step": 7185 }, { "epoch": 0.5539623805118717, "grad_norm": 3.53983736038208, "learning_rate": 4.371203493102359e-06, "loss": 0.9063, "step": 7186 }, { "epoch": 0.5540394696268887, "grad_norm": 3.321823835372925, "learning_rate": 4.369964977161853e-06, "loss": 0.8075, "step": 7187 }, { "epoch": 0.5541165587419057, "grad_norm": 3.6654202938079834, "learning_rate": 4.368726500500765e-06, "loss": 0.9791, "step": 7188 }, { "epoch": 0.5541936478569226, "grad_norm": 3.718724012374878, "learning_rate": 4.367488063196313e-06, "loss": 0.9182, "step": 7189 }, { "epoch": 0.5542707369719395, "grad_norm": 3.7318592071533203, "learning_rate": 4.366249665325703e-06, "loss": 0.9435, "step": 7190 }, { "epoch": 0.5543478260869565, "grad_norm": 3.3900840282440186, "learning_rate": 4.3650113069661426e-06, "loss": 0.7534, "step": 7191 }, { "epoch": 0.5544249152019735, "grad_norm": 3.4240376949310303, "learning_rate": 4.36377298819484e-06, "loss": 0.8855, "step": 7192 }, { "epoch": 0.5545020043169905, "grad_norm": 4.262863636016846, "learning_rate": 4.3625347090889975e-06, "loss": 0.9735, "step": 7193 }, { "epoch": 0.5545790934320074, "grad_norm": 4.42769193649292, "learning_rate": 4.361296469725813e-06, "loss": 1.1063, "step": 7194 }, { "epoch": 0.5546561825470243, "grad_norm": 4.841814041137695, "learning_rate": 4.360058270182486e-06, "loss": 0.9777, "step": 7195 }, { "epoch": 0.5547332716620413, "grad_norm": 3.457386016845703, "learning_rate": 4.3588201105362125e-06, "loss": 0.9072, "step": 7196 }, { "epoch": 0.5548103607770583, "grad_norm": 3.6573784351348877, "learning_rate": 4.357581990864183e-06, "loss": 0.8965, "step": 7197 }, { "epoch": 0.5548874498920753, "grad_norm": 3.5548505783081055, "learning_rate": 4.3563439112435904e-06, "loss": 0.7159, "step": 7198 }, { "epoch": 0.5549645390070922, "grad_norm": 3.5277884006500244, "learning_rate": 4.355105871751621e-06, "loss": 1.0291, "step": 7199 }, { "epoch": 0.5550416281221091, "grad_norm": 3.4423916339874268, "learning_rate": 4.35386787246546e-06, "loss": 0.8138, "step": 7200 }, { "epoch": 0.5551187172371261, "grad_norm": 4.191361904144287, "learning_rate": 4.352629913462292e-06, "loss": 0.9909, "step": 7201 }, { "epoch": 0.555195806352143, "grad_norm": 3.601893663406372, "learning_rate": 4.351391994819294e-06, "loss": 0.9646, "step": 7202 }, { "epoch": 0.5552728954671601, "grad_norm": 3.6490609645843506, "learning_rate": 4.350154116613648e-06, "loss": 0.9988, "step": 7203 }, { "epoch": 0.555349984582177, "grad_norm": 3.3986620903015137, "learning_rate": 4.348916278922526e-06, "loss": 0.9039, "step": 7204 }, { "epoch": 0.5554270736971939, "grad_norm": 4.264974117279053, "learning_rate": 4.347678481823102e-06, "loss": 1.0964, "step": 7205 }, { "epoch": 0.5555041628122109, "grad_norm": 4.056002140045166, "learning_rate": 4.346440725392546e-06, "loss": 1.044, "step": 7206 }, { "epoch": 0.5555812519272278, "grad_norm": 3.6536853313446045, "learning_rate": 4.345203009708027e-06, "loss": 1.0003, "step": 7207 }, { "epoch": 0.5556583410422449, "grad_norm": 3.690647840499878, "learning_rate": 4.343965334846708e-06, "loss": 0.9672, "step": 7208 }, { "epoch": 0.5557354301572618, "grad_norm": 3.597975969314575, "learning_rate": 4.3427277008857526e-06, "loss": 0.9204, "step": 7209 }, { "epoch": 0.5558125192722787, "grad_norm": 3.5312154293060303, "learning_rate": 4.341490107902323e-06, "loss": 0.9343, "step": 7210 }, { "epoch": 0.5558896083872957, "grad_norm": 3.8047034740448, "learning_rate": 4.340252555973572e-06, "loss": 1.0683, "step": 7211 }, { "epoch": 0.5559666975023126, "grad_norm": 3.7284018993377686, "learning_rate": 4.339015045176659e-06, "loss": 0.8455, "step": 7212 }, { "epoch": 0.5560437866173297, "grad_norm": 4.019394397735596, "learning_rate": 4.3377775755887335e-06, "loss": 1.0309, "step": 7213 }, { "epoch": 0.5561208757323466, "grad_norm": 4.077296733856201, "learning_rate": 4.336540147286946e-06, "loss": 1.0705, "step": 7214 }, { "epoch": 0.5561979648473635, "grad_norm": 3.3389627933502197, "learning_rate": 4.335302760348446e-06, "loss": 0.8988, "step": 7215 }, { "epoch": 0.5562750539623805, "grad_norm": 3.577312469482422, "learning_rate": 4.334065414850375e-06, "loss": 0.9091, "step": 7216 }, { "epoch": 0.5563521430773974, "grad_norm": 3.472486734390259, "learning_rate": 4.3328281108698765e-06, "loss": 0.9721, "step": 7217 }, { "epoch": 0.5564292321924145, "grad_norm": 4.345758438110352, "learning_rate": 4.33159084848409e-06, "loss": 1.0537, "step": 7218 }, { "epoch": 0.5565063213074314, "grad_norm": 3.5897371768951416, "learning_rate": 4.330353627770153e-06, "loss": 0.8934, "step": 7219 }, { "epoch": 0.5565834104224483, "grad_norm": 4.042248725891113, "learning_rate": 4.329116448805198e-06, "loss": 1.1139, "step": 7220 }, { "epoch": 0.5566604995374653, "grad_norm": 4.091678619384766, "learning_rate": 4.327879311666359e-06, "loss": 0.9429, "step": 7221 }, { "epoch": 0.5567375886524822, "grad_norm": 3.7830512523651123, "learning_rate": 4.326642216430763e-06, "loss": 1.0191, "step": 7222 }, { "epoch": 0.5568146777674993, "grad_norm": 3.766197919845581, "learning_rate": 4.325405163175539e-06, "loss": 0.983, "step": 7223 }, { "epoch": 0.5568917668825162, "grad_norm": 3.5098490715026855, "learning_rate": 4.3241681519778074e-06, "loss": 0.9068, "step": 7224 }, { "epoch": 0.5569688559975331, "grad_norm": 3.6200573444366455, "learning_rate": 4.322931182914692e-06, "loss": 0.8358, "step": 7225 }, { "epoch": 0.5570459451125501, "grad_norm": 3.716994285583496, "learning_rate": 4.321694256063311e-06, "loss": 1.0499, "step": 7226 }, { "epoch": 0.557123034227567, "grad_norm": 3.880415439605713, "learning_rate": 4.3204573715007795e-06, "loss": 0.9322, "step": 7227 }, { "epoch": 0.5572001233425841, "grad_norm": 3.300236940383911, "learning_rate": 4.3192205293042105e-06, "loss": 0.8236, "step": 7228 }, { "epoch": 0.557277212457601, "grad_norm": 4.212109565734863, "learning_rate": 4.317983729550718e-06, "loss": 0.9395, "step": 7229 }, { "epoch": 0.5573543015726179, "grad_norm": 3.5828144550323486, "learning_rate": 4.316746972317406e-06, "loss": 1.0149, "step": 7230 }, { "epoch": 0.5574313906876349, "grad_norm": 3.6307485103607178, "learning_rate": 4.315510257681381e-06, "loss": 1.0208, "step": 7231 }, { "epoch": 0.5575084798026518, "grad_norm": 3.832481622695923, "learning_rate": 4.314273585719749e-06, "loss": 1.0138, "step": 7232 }, { "epoch": 0.5575855689176689, "grad_norm": 3.533522367477417, "learning_rate": 4.313036956509605e-06, "loss": 1.0133, "step": 7233 }, { "epoch": 0.5576626580326858, "grad_norm": 3.565697193145752, "learning_rate": 4.3118003701280504e-06, "loss": 1.0029, "step": 7234 }, { "epoch": 0.5577397471477027, "grad_norm": 3.788064956665039, "learning_rate": 4.310563826652175e-06, "loss": 0.9481, "step": 7235 }, { "epoch": 0.5578168362627197, "grad_norm": 3.4550724029541016, "learning_rate": 4.3093273261590785e-06, "loss": 0.8953, "step": 7236 }, { "epoch": 0.5578939253777366, "grad_norm": 3.3875648975372314, "learning_rate": 4.308090868725844e-06, "loss": 0.928, "step": 7237 }, { "epoch": 0.5579710144927537, "grad_norm": 4.244081497192383, "learning_rate": 4.306854454429561e-06, "loss": 0.9824, "step": 7238 }, { "epoch": 0.5580481036077706, "grad_norm": 3.3602986335754395, "learning_rate": 4.3056180833473135e-06, "loss": 0.812, "step": 7239 }, { "epoch": 0.5581251927227875, "grad_norm": 3.6101772785186768, "learning_rate": 4.304381755556182e-06, "loss": 0.8871, "step": 7240 }, { "epoch": 0.5582022818378045, "grad_norm": 3.709418535232544, "learning_rate": 4.303145471133246e-06, "loss": 0.9672, "step": 7241 }, { "epoch": 0.5582793709528214, "grad_norm": 3.9773929119110107, "learning_rate": 4.301909230155579e-06, "loss": 0.9546, "step": 7242 }, { "epoch": 0.5583564600678385, "grad_norm": 3.4679555892944336, "learning_rate": 4.300673032700256e-06, "loss": 0.8538, "step": 7243 }, { "epoch": 0.5584335491828554, "grad_norm": 3.525493860244751, "learning_rate": 4.2994368788443496e-06, "loss": 0.9495, "step": 7244 }, { "epoch": 0.5585106382978723, "grad_norm": 3.462528705596924, "learning_rate": 4.298200768664924e-06, "loss": 0.9793, "step": 7245 }, { "epoch": 0.5585877274128893, "grad_norm": 3.402458906173706, "learning_rate": 4.296964702239046e-06, "loss": 0.8481, "step": 7246 }, { "epoch": 0.5586648165279062, "grad_norm": 3.4688830375671387, "learning_rate": 4.295728679643778e-06, "loss": 0.898, "step": 7247 }, { "epoch": 0.5587419056429233, "grad_norm": 3.5132532119750977, "learning_rate": 4.2944927009561786e-06, "loss": 0.9714, "step": 7248 }, { "epoch": 0.5588189947579402, "grad_norm": 3.7128775119781494, "learning_rate": 4.293256766253307e-06, "loss": 1.0389, "step": 7249 }, { "epoch": 0.5588960838729571, "grad_norm": 3.6854405403137207, "learning_rate": 4.2920208756122136e-06, "loss": 0.9308, "step": 7250 }, { "epoch": 0.5589731729879741, "grad_norm": 3.7184994220733643, "learning_rate": 4.290785029109953e-06, "loss": 0.9581, "step": 7251 }, { "epoch": 0.559050262102991, "grad_norm": 3.4944541454315186, "learning_rate": 4.2895492268235725e-06, "loss": 0.9439, "step": 7252 }, { "epoch": 0.559127351218008, "grad_norm": 3.616788387298584, "learning_rate": 4.288313468830119e-06, "loss": 0.8744, "step": 7253 }, { "epoch": 0.559204440333025, "grad_norm": 3.7554306983947754, "learning_rate": 4.2870777552066336e-06, "loss": 0.8796, "step": 7254 }, { "epoch": 0.559281529448042, "grad_norm": 3.7401881217956543, "learning_rate": 4.285842086030159e-06, "loss": 0.8975, "step": 7255 }, { "epoch": 0.5593586185630589, "grad_norm": 3.8018808364868164, "learning_rate": 4.28460646137773e-06, "loss": 0.827, "step": 7256 }, { "epoch": 0.5594357076780758, "grad_norm": 3.6006662845611572, "learning_rate": 4.283370881326384e-06, "loss": 0.9216, "step": 7257 }, { "epoch": 0.5595127967930928, "grad_norm": 3.953252077102661, "learning_rate": 4.2821353459531525e-06, "loss": 0.9649, "step": 7258 }, { "epoch": 0.5595898859081098, "grad_norm": 3.724891185760498, "learning_rate": 4.2808998553350625e-06, "loss": 1.0039, "step": 7259 }, { "epoch": 0.5596669750231268, "grad_norm": 3.795192241668701, "learning_rate": 4.279664409549145e-06, "loss": 0.9401, "step": 7260 }, { "epoch": 0.5597440641381437, "grad_norm": 4.127142906188965, "learning_rate": 4.2784290086724186e-06, "loss": 0.9922, "step": 7261 }, { "epoch": 0.5598211532531606, "grad_norm": 3.5862457752227783, "learning_rate": 4.277193652781906e-06, "loss": 0.9433, "step": 7262 }, { "epoch": 0.5598982423681776, "grad_norm": 4.025807857513428, "learning_rate": 4.275958341954628e-06, "loss": 0.8679, "step": 7263 }, { "epoch": 0.5599753314831946, "grad_norm": 3.7022628784179688, "learning_rate": 4.274723076267596e-06, "loss": 0.8451, "step": 7264 }, { "epoch": 0.5600524205982116, "grad_norm": 3.6183743476867676, "learning_rate": 4.273487855797823e-06, "loss": 0.8063, "step": 7265 }, { "epoch": 0.5601295097132285, "grad_norm": 3.7264175415039062, "learning_rate": 4.272252680622321e-06, "loss": 0.8843, "step": 7266 }, { "epoch": 0.5602065988282454, "grad_norm": 3.512242317199707, "learning_rate": 4.271017550818095e-06, "loss": 0.925, "step": 7267 }, { "epoch": 0.5602836879432624, "grad_norm": 3.9300243854522705, "learning_rate": 4.269782466462149e-06, "loss": 0.949, "step": 7268 }, { "epoch": 0.5603607770582794, "grad_norm": 3.0627076625823975, "learning_rate": 4.268547427631485e-06, "loss": 0.8389, "step": 7269 }, { "epoch": 0.5604378661732964, "grad_norm": 3.468635082244873, "learning_rate": 4.267312434403099e-06, "loss": 0.911, "step": 7270 }, { "epoch": 0.5605149552883133, "grad_norm": 3.891932249069214, "learning_rate": 4.26607748685399e-06, "loss": 0.9965, "step": 7271 }, { "epoch": 0.5605920444033302, "grad_norm": 3.7001166343688965, "learning_rate": 4.2648425850611465e-06, "loss": 0.8025, "step": 7272 }, { "epoch": 0.5606691335183472, "grad_norm": 3.481006145477295, "learning_rate": 4.2636077291015615e-06, "loss": 0.8563, "step": 7273 }, { "epoch": 0.5607462226333642, "grad_norm": 3.4894566535949707, "learning_rate": 4.262372919052221e-06, "loss": 0.9797, "step": 7274 }, { "epoch": 0.5608233117483812, "grad_norm": 3.728573799133301, "learning_rate": 4.2611381549901085e-06, "loss": 0.8734, "step": 7275 }, { "epoch": 0.5609004008633981, "grad_norm": 3.5050055980682373, "learning_rate": 4.259903436992204e-06, "loss": 0.8772, "step": 7276 }, { "epoch": 0.560977489978415, "grad_norm": 3.7864038944244385, "learning_rate": 4.258668765135489e-06, "loss": 0.939, "step": 7277 }, { "epoch": 0.561054579093432, "grad_norm": 3.818662405014038, "learning_rate": 4.257434139496937e-06, "loss": 1.0586, "step": 7278 }, { "epoch": 0.561131668208449, "grad_norm": 4.216384410858154, "learning_rate": 4.25619956015352e-06, "loss": 1.0841, "step": 7279 }, { "epoch": 0.561208757323466, "grad_norm": 3.877438545227051, "learning_rate": 4.254965027182206e-06, "loss": 0.8282, "step": 7280 }, { "epoch": 0.5612858464384829, "grad_norm": 3.9610254764556885, "learning_rate": 4.253730540659966e-06, "loss": 1.0798, "step": 7281 }, { "epoch": 0.5613629355534998, "grad_norm": 3.5456466674804688, "learning_rate": 4.252496100663762e-06, "loss": 0.8978, "step": 7282 }, { "epoch": 0.5614400246685168, "grad_norm": 3.5786006450653076, "learning_rate": 4.2512617072705546e-06, "loss": 0.956, "step": 7283 }, { "epoch": 0.5615171137835338, "grad_norm": 3.5269174575805664, "learning_rate": 4.250027360557302e-06, "loss": 0.8546, "step": 7284 }, { "epoch": 0.5615942028985508, "grad_norm": 3.479971408843994, "learning_rate": 4.248793060600959e-06, "loss": 0.832, "step": 7285 }, { "epoch": 0.5616712920135677, "grad_norm": 3.701209783554077, "learning_rate": 4.247558807478478e-06, "loss": 1.0244, "step": 7286 }, { "epoch": 0.5617483811285846, "grad_norm": 3.449061393737793, "learning_rate": 4.246324601266809e-06, "loss": 0.8604, "step": 7287 }, { "epoch": 0.5618254702436016, "grad_norm": 3.8695156574249268, "learning_rate": 4.245090442042897e-06, "loss": 1.0627, "step": 7288 }, { "epoch": 0.5619025593586185, "grad_norm": 3.7182259559631348, "learning_rate": 4.243856329883687e-06, "loss": 0.9507, "step": 7289 }, { "epoch": 0.5619796484736356, "grad_norm": 3.4913182258605957, "learning_rate": 4.242622264866118e-06, "loss": 0.9745, "step": 7290 }, { "epoch": 0.5620567375886525, "grad_norm": 3.5762779712677, "learning_rate": 4.241388247067128e-06, "loss": 0.899, "step": 7291 }, { "epoch": 0.5621338267036694, "grad_norm": 3.425102472305298, "learning_rate": 4.240154276563652e-06, "loss": 0.9087, "step": 7292 }, { "epoch": 0.5622109158186864, "grad_norm": 3.7708728313446045, "learning_rate": 4.238920353432623e-06, "loss": 0.9664, "step": 7293 }, { "epoch": 0.5622880049337033, "grad_norm": 3.508423089981079, "learning_rate": 4.237686477750966e-06, "loss": 0.9499, "step": 7294 }, { "epoch": 0.5623650940487204, "grad_norm": 3.62243390083313, "learning_rate": 4.23645264959561e-06, "loss": 0.9453, "step": 7295 }, { "epoch": 0.5624421831637373, "grad_norm": 3.7550387382507324, "learning_rate": 4.235218869043477e-06, "loss": 0.9756, "step": 7296 }, { "epoch": 0.5625192722787542, "grad_norm": 3.69486927986145, "learning_rate": 4.233985136171487e-06, "loss": 0.9725, "step": 7297 }, { "epoch": 0.5625963613937712, "grad_norm": 3.8165245056152344, "learning_rate": 4.232751451056555e-06, "loss": 0.917, "step": 7298 }, { "epoch": 0.5626734505087881, "grad_norm": 3.5180726051330566, "learning_rate": 4.231517813775597e-06, "loss": 0.8487, "step": 7299 }, { "epoch": 0.5627505396238052, "grad_norm": 3.76409912109375, "learning_rate": 4.230284224405523e-06, "loss": 0.9902, "step": 7300 }, { "epoch": 0.5628276287388221, "grad_norm": 3.4409983158111572, "learning_rate": 4.2290506830232415e-06, "loss": 0.8749, "step": 7301 }, { "epoch": 0.562904717853839, "grad_norm": 3.861907958984375, "learning_rate": 4.227817189705657e-06, "loss": 0.8812, "step": 7302 }, { "epoch": 0.562981806968856, "grad_norm": 3.839994192123413, "learning_rate": 4.226583744529672e-06, "loss": 0.954, "step": 7303 }, { "epoch": 0.5630588960838729, "grad_norm": 4.001551151275635, "learning_rate": 4.225350347572185e-06, "loss": 0.8083, "step": 7304 }, { "epoch": 0.56313598519889, "grad_norm": 3.7375106811523438, "learning_rate": 4.22411699891009e-06, "loss": 1.0402, "step": 7305 }, { "epoch": 0.5632130743139069, "grad_norm": 3.5613088607788086, "learning_rate": 4.2228836986202845e-06, "loss": 0.9585, "step": 7306 }, { "epoch": 0.5632901634289238, "grad_norm": 3.4407291412353516, "learning_rate": 4.221650446779653e-06, "loss": 0.9678, "step": 7307 }, { "epoch": 0.5633672525439408, "grad_norm": 3.501506805419922, "learning_rate": 4.2204172434650866e-06, "loss": 0.9946, "step": 7308 }, { "epoch": 0.5634443416589577, "grad_norm": 3.91636061668396, "learning_rate": 4.219184088753467e-06, "loss": 1.0566, "step": 7309 }, { "epoch": 0.5635214307739748, "grad_norm": 3.84093976020813, "learning_rate": 4.217950982721675e-06, "loss": 0.9006, "step": 7310 }, { "epoch": 0.5635985198889917, "grad_norm": 3.702842950820923, "learning_rate": 4.21671792544659e-06, "loss": 0.9995, "step": 7311 }, { "epoch": 0.5636756090040086, "grad_norm": 3.6409497261047363, "learning_rate": 4.215484917005085e-06, "loss": 0.9295, "step": 7312 }, { "epoch": 0.5637526981190256, "grad_norm": 3.425840377807617, "learning_rate": 4.2142519574740315e-06, "loss": 0.8669, "step": 7313 }, { "epoch": 0.5638297872340425, "grad_norm": 3.576141595840454, "learning_rate": 4.2130190469303005e-06, "loss": 0.9028, "step": 7314 }, { "epoch": 0.5639068763490596, "grad_norm": 3.4973714351654053, "learning_rate": 4.211786185450756e-06, "loss": 0.8018, "step": 7315 }, { "epoch": 0.5639839654640765, "grad_norm": 3.3027119636535645, "learning_rate": 4.210553373112259e-06, "loss": 0.8958, "step": 7316 }, { "epoch": 0.5640610545790934, "grad_norm": 4.032995223999023, "learning_rate": 4.209320609991672e-06, "loss": 0.9982, "step": 7317 }, { "epoch": 0.5641381436941104, "grad_norm": 3.718665838241577, "learning_rate": 4.208087896165849e-06, "loss": 0.9066, "step": 7318 }, { "epoch": 0.5642152328091273, "grad_norm": 3.670677900314331, "learning_rate": 4.206855231711645e-06, "loss": 1.0318, "step": 7319 }, { "epoch": 0.5642923219241444, "grad_norm": 3.2902212142944336, "learning_rate": 4.205622616705909e-06, "loss": 0.896, "step": 7320 }, { "epoch": 0.5643694110391613, "grad_norm": 3.813958168029785, "learning_rate": 4.204390051225488e-06, "loss": 1.0047, "step": 7321 }, { "epoch": 0.5644465001541782, "grad_norm": 3.3876993656158447, "learning_rate": 4.203157535347229e-06, "loss": 0.9258, "step": 7322 }, { "epoch": 0.5645235892691952, "grad_norm": 3.70788311958313, "learning_rate": 4.201925069147969e-06, "loss": 0.962, "step": 7323 }, { "epoch": 0.5646006783842121, "grad_norm": 3.678077459335327, "learning_rate": 4.200692652704545e-06, "loss": 1.0408, "step": 7324 }, { "epoch": 0.5646777674992292, "grad_norm": 3.8763816356658936, "learning_rate": 4.199460286093797e-06, "loss": 0.9782, "step": 7325 }, { "epoch": 0.5647548566142461, "grad_norm": 3.756704807281494, "learning_rate": 4.198227969392556e-06, "loss": 0.9667, "step": 7326 }, { "epoch": 0.564831945729263, "grad_norm": 3.946401596069336, "learning_rate": 4.196995702677646e-06, "loss": 0.9345, "step": 7327 }, { "epoch": 0.56490903484428, "grad_norm": 3.7415993213653564, "learning_rate": 4.195763486025896e-06, "loss": 0.9629, "step": 7328 }, { "epoch": 0.5649861239592969, "grad_norm": 3.9311485290527344, "learning_rate": 4.194531319514128e-06, "loss": 0.9507, "step": 7329 }, { "epoch": 0.565063213074314, "grad_norm": 3.924248456954956, "learning_rate": 4.1932992032191595e-06, "loss": 0.9141, "step": 7330 }, { "epoch": 0.5651403021893309, "grad_norm": 3.478628396987915, "learning_rate": 4.1920671372178075e-06, "loss": 1.0346, "step": 7331 }, { "epoch": 0.5652173913043478, "grad_norm": 3.4320709705352783, "learning_rate": 4.190835121586887e-06, "loss": 0.9594, "step": 7332 }, { "epoch": 0.5652944804193648, "grad_norm": 3.83650541305542, "learning_rate": 4.189603156403204e-06, "loss": 0.9973, "step": 7333 }, { "epoch": 0.5653715695343817, "grad_norm": 3.5375068187713623, "learning_rate": 4.188371241743568e-06, "loss": 0.9137, "step": 7334 }, { "epoch": 0.5654486586493987, "grad_norm": 4.036672592163086, "learning_rate": 4.187139377684781e-06, "loss": 1.0529, "step": 7335 }, { "epoch": 0.5655257477644157, "grad_norm": 3.483830213546753, "learning_rate": 4.185907564303644e-06, "loss": 0.8893, "step": 7336 }, { "epoch": 0.5656028368794326, "grad_norm": 3.592637300491333, "learning_rate": 4.184675801676955e-06, "loss": 1.0526, "step": 7337 }, { "epoch": 0.5656799259944496, "grad_norm": 3.425968647003174, "learning_rate": 4.183444089881506e-06, "loss": 0.9941, "step": 7338 }, { "epoch": 0.5657570151094665, "grad_norm": 3.730457305908203, "learning_rate": 4.182212428994088e-06, "loss": 0.9906, "step": 7339 }, { "epoch": 0.5658341042244835, "grad_norm": 3.9241623878479004, "learning_rate": 4.1809808190914925e-06, "loss": 0.9277, "step": 7340 }, { "epoch": 0.5659111933395005, "grad_norm": 3.7960057258605957, "learning_rate": 4.1797492602504985e-06, "loss": 0.8966, "step": 7341 }, { "epoch": 0.5659882824545174, "grad_norm": 3.747955083847046, "learning_rate": 4.178517752547891e-06, "loss": 1.0132, "step": 7342 }, { "epoch": 0.5660653715695344, "grad_norm": 3.649674654006958, "learning_rate": 4.177286296060448e-06, "loss": 0.8899, "step": 7343 }, { "epoch": 0.5661424606845513, "grad_norm": 3.396299123764038, "learning_rate": 4.176054890864942e-06, "loss": 0.9728, "step": 7344 }, { "epoch": 0.5662195497995683, "grad_norm": 3.8877921104431152, "learning_rate": 4.174823537038147e-06, "loss": 1.0016, "step": 7345 }, { "epoch": 0.5662966389145853, "grad_norm": 3.6778106689453125, "learning_rate": 4.173592234656831e-06, "loss": 0.9368, "step": 7346 }, { "epoch": 0.5663737280296022, "grad_norm": 3.6797902584075928, "learning_rate": 4.172360983797757e-06, "loss": 0.9625, "step": 7347 }, { "epoch": 0.5664508171446192, "grad_norm": 4.3902268409729, "learning_rate": 4.171129784537692e-06, "loss": 0.961, "step": 7348 }, { "epoch": 0.5665279062596361, "grad_norm": 3.7198588848114014, "learning_rate": 4.16989863695339e-06, "loss": 0.9612, "step": 7349 }, { "epoch": 0.5666049953746531, "grad_norm": 3.431483507156372, "learning_rate": 4.16866754112161e-06, "loss": 0.8565, "step": 7350 }, { "epoch": 0.5666820844896701, "grad_norm": 3.6897926330566406, "learning_rate": 4.167436497119103e-06, "loss": 0.9245, "step": 7351 }, { "epoch": 0.566759173604687, "grad_norm": 3.8588991165161133, "learning_rate": 4.166205505022618e-06, "loss": 1.04, "step": 7352 }, { "epoch": 0.566836262719704, "grad_norm": 3.7585813999176025, "learning_rate": 4.1649745649089015e-06, "loss": 0.8811, "step": 7353 }, { "epoch": 0.5669133518347209, "grad_norm": 3.642169713973999, "learning_rate": 4.163743676854697e-06, "loss": 0.8462, "step": 7354 }, { "epoch": 0.5669904409497379, "grad_norm": 3.2136752605438232, "learning_rate": 4.162512840936742e-06, "loss": 0.9009, "step": 7355 }, { "epoch": 0.5670675300647549, "grad_norm": 3.7039544582366943, "learning_rate": 4.161282057231776e-06, "loss": 0.9187, "step": 7356 }, { "epoch": 0.5671446191797718, "grad_norm": 3.5421409606933594, "learning_rate": 4.160051325816528e-06, "loss": 0.9465, "step": 7357 }, { "epoch": 0.5672217082947888, "grad_norm": 3.6218063831329346, "learning_rate": 4.158820646767729e-06, "loss": 1.0368, "step": 7358 }, { "epoch": 0.5672987974098057, "grad_norm": 3.5446629524230957, "learning_rate": 4.157590020162108e-06, "loss": 1.0172, "step": 7359 }, { "epoch": 0.5673758865248227, "grad_norm": 3.736348867416382, "learning_rate": 4.156359446076385e-06, "loss": 0.9261, "step": 7360 }, { "epoch": 0.5674529756398397, "grad_norm": 3.8830409049987793, "learning_rate": 4.1551289245872815e-06, "loss": 1.0677, "step": 7361 }, { "epoch": 0.5675300647548566, "grad_norm": 3.8011181354522705, "learning_rate": 4.153898455771514e-06, "loss": 0.9841, "step": 7362 }, { "epoch": 0.5676071538698736, "grad_norm": 3.9767227172851562, "learning_rate": 4.152668039705796e-06, "loss": 1.0165, "step": 7363 }, { "epoch": 0.5676842429848905, "grad_norm": 3.4617295265197754, "learning_rate": 4.151437676466836e-06, "loss": 0.9734, "step": 7364 }, { "epoch": 0.5677613320999075, "grad_norm": 3.2695651054382324, "learning_rate": 4.150207366131344e-06, "loss": 0.9571, "step": 7365 }, { "epoch": 0.5678384212149244, "grad_norm": 3.5925192832946777, "learning_rate": 4.14897710877602e-06, "loss": 0.9581, "step": 7366 }, { "epoch": 0.5679155103299414, "grad_norm": 3.435485363006592, "learning_rate": 4.147746904477567e-06, "loss": 0.9586, "step": 7367 }, { "epoch": 0.5679925994449584, "grad_norm": 3.665407657623291, "learning_rate": 4.146516753312677e-06, "loss": 1.023, "step": 7368 }, { "epoch": 0.5680696885599753, "grad_norm": 3.7188949584960938, "learning_rate": 4.145286655358051e-06, "loss": 0.9351, "step": 7369 }, { "epoch": 0.5681467776749923, "grad_norm": 4.338947296142578, "learning_rate": 4.144056610690375e-06, "loss": 0.9218, "step": 7370 }, { "epoch": 0.5682238667900092, "grad_norm": 4.026747703552246, "learning_rate": 4.142826619386334e-06, "loss": 1.0557, "step": 7371 }, { "epoch": 0.5683009559050262, "grad_norm": 3.3105926513671875, "learning_rate": 4.141596681522616e-06, "loss": 0.8553, "step": 7372 }, { "epoch": 0.5683780450200432, "grad_norm": 3.3676323890686035, "learning_rate": 4.140366797175899e-06, "loss": 0.8005, "step": 7373 }, { "epoch": 0.5684551341350601, "grad_norm": 3.641796588897705, "learning_rate": 4.13913696642286e-06, "loss": 0.8849, "step": 7374 }, { "epoch": 0.5685322232500771, "grad_norm": 3.88008975982666, "learning_rate": 4.137907189340172e-06, "loss": 0.9257, "step": 7375 }, { "epoch": 0.568609312365094, "grad_norm": 3.845065116882324, "learning_rate": 4.136677466004506e-06, "loss": 0.9235, "step": 7376 }, { "epoch": 0.568686401480111, "grad_norm": 3.5287108421325684, "learning_rate": 4.13544779649253e-06, "loss": 0.9357, "step": 7377 }, { "epoch": 0.568763490595128, "grad_norm": 3.5272388458251953, "learning_rate": 4.1342181808809046e-06, "loss": 0.9282, "step": 7378 }, { "epoch": 0.5688405797101449, "grad_norm": 4.141164779663086, "learning_rate": 4.132988619246291e-06, "loss": 0.8634, "step": 7379 }, { "epoch": 0.5689176688251619, "grad_norm": 3.388904571533203, "learning_rate": 4.131759111665349e-06, "loss": 0.9187, "step": 7380 }, { "epoch": 0.5689947579401788, "grad_norm": 3.592479944229126, "learning_rate": 4.130529658214728e-06, "loss": 0.9531, "step": 7381 }, { "epoch": 0.5690718470551958, "grad_norm": 3.7211825847625732, "learning_rate": 4.1293002589710795e-06, "loss": 1.0478, "step": 7382 }, { "epoch": 0.5691489361702128, "grad_norm": 3.6857645511627197, "learning_rate": 4.128070914011052e-06, "loss": 1.0107, "step": 7383 }, { "epoch": 0.5692260252852297, "grad_norm": 3.7964882850646973, "learning_rate": 4.126841623411286e-06, "loss": 1.0194, "step": 7384 }, { "epoch": 0.5693031144002467, "grad_norm": 3.608358860015869, "learning_rate": 4.125612387248423e-06, "loss": 0.8752, "step": 7385 }, { "epoch": 0.5693802035152636, "grad_norm": 3.4516360759735107, "learning_rate": 4.124383205599099e-06, "loss": 0.9234, "step": 7386 }, { "epoch": 0.5694572926302806, "grad_norm": 3.3848817348480225, "learning_rate": 4.123154078539946e-06, "loss": 0.8775, "step": 7387 }, { "epoch": 0.5695343817452976, "grad_norm": 3.8858206272125244, "learning_rate": 4.121925006147597e-06, "loss": 1.0166, "step": 7388 }, { "epoch": 0.5696114708603145, "grad_norm": 3.7127907276153564, "learning_rate": 4.120695988498674e-06, "loss": 0.9005, "step": 7389 }, { "epoch": 0.5696885599753315, "grad_norm": 3.9486067295074463, "learning_rate": 4.119467025669803e-06, "loss": 0.9782, "step": 7390 }, { "epoch": 0.5697656490903484, "grad_norm": 3.396491765975952, "learning_rate": 4.118238117737604e-06, "loss": 0.8566, "step": 7391 }, { "epoch": 0.5698427382053654, "grad_norm": 3.7514426708221436, "learning_rate": 4.1170092647786895e-06, "loss": 0.9061, "step": 7392 }, { "epoch": 0.5699198273203824, "grad_norm": 3.4897749423980713, "learning_rate": 4.115780466869676e-06, "loss": 0.9237, "step": 7393 }, { "epoch": 0.5699969164353993, "grad_norm": 3.4681146144866943, "learning_rate": 4.11455172408717e-06, "loss": 0.9261, "step": 7394 }, { "epoch": 0.5700740055504163, "grad_norm": 3.301769733428955, "learning_rate": 4.113323036507778e-06, "loss": 0.8158, "step": 7395 }, { "epoch": 0.5701510946654332, "grad_norm": 3.7004668712615967, "learning_rate": 4.112094404208102e-06, "loss": 0.8965, "step": 7396 }, { "epoch": 0.5702281837804501, "grad_norm": 3.7279725074768066, "learning_rate": 4.110865827264742e-06, "loss": 0.9718, "step": 7397 }, { "epoch": 0.5703052728954672, "grad_norm": 3.387369394302368, "learning_rate": 4.109637305754293e-06, "loss": 0.9405, "step": 7398 }, { "epoch": 0.5703823620104841, "grad_norm": 3.686414957046509, "learning_rate": 4.108408839753346e-06, "loss": 0.8854, "step": 7399 }, { "epoch": 0.5704594511255011, "grad_norm": 3.8577775955200195, "learning_rate": 4.107180429338491e-06, "loss": 0.9723, "step": 7400 }, { "epoch": 0.570536540240518, "grad_norm": 3.784442901611328, "learning_rate": 4.105952074586311e-06, "loss": 0.8432, "step": 7401 }, { "epoch": 0.570613629355535, "grad_norm": 4.02522611618042, "learning_rate": 4.10472377557339e-06, "loss": 0.9356, "step": 7402 }, { "epoch": 0.570690718470552, "grad_norm": 4.0557966232299805, "learning_rate": 4.103495532376304e-06, "loss": 0.9714, "step": 7403 }, { "epoch": 0.5707678075855689, "grad_norm": 4.144707679748535, "learning_rate": 4.1022673450716295e-06, "loss": 1.0333, "step": 7404 }, { "epoch": 0.5708448967005859, "grad_norm": 3.7137889862060547, "learning_rate": 4.101039213735935e-06, "loss": 1.0662, "step": 7405 }, { "epoch": 0.5709219858156028, "grad_norm": 3.3066835403442383, "learning_rate": 4.09981113844579e-06, "loss": 0.7372, "step": 7406 }, { "epoch": 0.5709990749306197, "grad_norm": 3.355921983718872, "learning_rate": 4.098583119277759e-06, "loss": 0.9921, "step": 7407 }, { "epoch": 0.5710761640456368, "grad_norm": 3.732572555541992, "learning_rate": 4.097355156308402e-06, "loss": 0.9918, "step": 7408 }, { "epoch": 0.5711532531606537, "grad_norm": 3.7717831134796143, "learning_rate": 4.096127249614276e-06, "loss": 0.8853, "step": 7409 }, { "epoch": 0.5712303422756707, "grad_norm": 3.5494415760040283, "learning_rate": 4.094899399271935e-06, "loss": 1.0293, "step": 7410 }, { "epoch": 0.5713074313906876, "grad_norm": 3.6082026958465576, "learning_rate": 4.093671605357928e-06, "loss": 0.9877, "step": 7411 }, { "epoch": 0.5713845205057045, "grad_norm": 3.492135524749756, "learning_rate": 4.092443867948801e-06, "loss": 0.8388, "step": 7412 }, { "epoch": 0.5714616096207216, "grad_norm": 3.4625587463378906, "learning_rate": 4.0912161871211e-06, "loss": 0.8672, "step": 7413 }, { "epoch": 0.5715386987357385, "grad_norm": 3.4896044731140137, "learning_rate": 4.089988562951363e-06, "loss": 0.933, "step": 7414 }, { "epoch": 0.5716157878507555, "grad_norm": 3.792921781539917, "learning_rate": 4.088760995516127e-06, "loss": 0.9755, "step": 7415 }, { "epoch": 0.5716928769657724, "grad_norm": 4.106998443603516, "learning_rate": 4.0875334848919225e-06, "loss": 0.9181, "step": 7416 }, { "epoch": 0.5717699660807893, "grad_norm": 3.3308520317077637, "learning_rate": 4.08630603115528e-06, "loss": 0.9823, "step": 7417 }, { "epoch": 0.5718470551958064, "grad_norm": 3.626404285430908, "learning_rate": 4.085078634382724e-06, "loss": 0.924, "step": 7418 }, { "epoch": 0.5719241443108233, "grad_norm": 3.646789312362671, "learning_rate": 4.083851294650776e-06, "loss": 0.9783, "step": 7419 }, { "epoch": 0.5720012334258403, "grad_norm": 3.3674442768096924, "learning_rate": 4.0826240120359565e-06, "loss": 0.8714, "step": 7420 }, { "epoch": 0.5720783225408572, "grad_norm": 3.333601474761963, "learning_rate": 4.081396786614777e-06, "loss": 0.9889, "step": 7421 }, { "epoch": 0.5721554116558741, "grad_norm": 3.5855748653411865, "learning_rate": 4.080169618463751e-06, "loss": 0.8852, "step": 7422 }, { "epoch": 0.5722325007708912, "grad_norm": 3.490710496902466, "learning_rate": 4.078942507659386e-06, "loss": 0.9468, "step": 7423 }, { "epoch": 0.5723095898859081, "grad_norm": 3.4587180614471436, "learning_rate": 4.0777154542781846e-06, "loss": 0.9592, "step": 7424 }, { "epoch": 0.5723866790009251, "grad_norm": 3.637395143508911, "learning_rate": 4.076488458396649e-06, "loss": 0.9181, "step": 7425 }, { "epoch": 0.572463768115942, "grad_norm": 3.4306821823120117, "learning_rate": 4.075261520091273e-06, "loss": 0.9677, "step": 7426 }, { "epoch": 0.572540857230959, "grad_norm": 3.6101748943328857, "learning_rate": 4.074034639438553e-06, "loss": 0.9951, "step": 7427 }, { "epoch": 0.572617946345976, "grad_norm": 3.5071911811828613, "learning_rate": 4.072807816514978e-06, "loss": 0.878, "step": 7428 }, { "epoch": 0.5726950354609929, "grad_norm": 3.8580219745635986, "learning_rate": 4.071581051397033e-06, "loss": 0.8271, "step": 7429 }, { "epoch": 0.5727721245760099, "grad_norm": 3.526630401611328, "learning_rate": 4.070354344161201e-06, "loss": 0.9959, "step": 7430 }, { "epoch": 0.5728492136910268, "grad_norm": 3.5120689868927, "learning_rate": 4.069127694883962e-06, "loss": 0.8585, "step": 7431 }, { "epoch": 0.5729263028060438, "grad_norm": 3.920745611190796, "learning_rate": 4.067901103641788e-06, "loss": 0.9903, "step": 7432 }, { "epoch": 0.5730033919210608, "grad_norm": 3.5974879264831543, "learning_rate": 4.066674570511156e-06, "loss": 0.9851, "step": 7433 }, { "epoch": 0.5730804810360777, "grad_norm": 3.3706092834472656, "learning_rate": 4.065448095568527e-06, "loss": 0.9094, "step": 7434 }, { "epoch": 0.5731575701510947, "grad_norm": 3.542107582092285, "learning_rate": 4.064221678890371e-06, "loss": 0.9867, "step": 7435 }, { "epoch": 0.5732346592661116, "grad_norm": 3.5891056060791016, "learning_rate": 4.062995320553147e-06, "loss": 0.8284, "step": 7436 }, { "epoch": 0.5733117483811286, "grad_norm": 3.375009059906006, "learning_rate": 4.061769020633311e-06, "loss": 0.86, "step": 7437 }, { "epoch": 0.5733888374961456, "grad_norm": 3.44054913520813, "learning_rate": 4.060542779207317e-06, "loss": 0.9128, "step": 7438 }, { "epoch": 0.5734659266111625, "grad_norm": 3.8439524173736572, "learning_rate": 4.059316596351617e-06, "loss": 1.0774, "step": 7439 }, { "epoch": 0.5735430157261795, "grad_norm": 4.025179862976074, "learning_rate": 4.058090472142654e-06, "loss": 1.0527, "step": 7440 }, { "epoch": 0.5736201048411964, "grad_norm": 3.771923780441284, "learning_rate": 4.056864406656872e-06, "loss": 0.9199, "step": 7441 }, { "epoch": 0.5736971939562134, "grad_norm": 3.6226768493652344, "learning_rate": 4.05563839997071e-06, "loss": 0.8523, "step": 7442 }, { "epoch": 0.5737742830712304, "grad_norm": 3.7793591022491455, "learning_rate": 4.054412452160601e-06, "loss": 0.9109, "step": 7443 }, { "epoch": 0.5738513721862473, "grad_norm": 3.322064161300659, "learning_rate": 4.053186563302981e-06, "loss": 0.8126, "step": 7444 }, { "epoch": 0.5739284613012643, "grad_norm": 3.742952585220337, "learning_rate": 4.051960733474273e-06, "loss": 1.0401, "step": 7445 }, { "epoch": 0.5740055504162812, "grad_norm": 3.36472487449646, "learning_rate": 4.0507349627509045e-06, "loss": 0.7989, "step": 7446 }, { "epoch": 0.5740826395312982, "grad_norm": 3.582036018371582, "learning_rate": 4.049509251209295e-06, "loss": 0.9565, "step": 7447 }, { "epoch": 0.5741597286463151, "grad_norm": 3.877749443054199, "learning_rate": 4.048283598925859e-06, "loss": 0.8715, "step": 7448 }, { "epoch": 0.5742368177613321, "grad_norm": 3.445472002029419, "learning_rate": 4.0470580059770125e-06, "loss": 0.9293, "step": 7449 }, { "epoch": 0.5743139068763491, "grad_norm": 3.6132113933563232, "learning_rate": 4.045832472439165e-06, "loss": 1.0043, "step": 7450 }, { "epoch": 0.574390995991366, "grad_norm": 3.4551825523376465, "learning_rate": 4.04460699838872e-06, "loss": 0.8898, "step": 7451 }, { "epoch": 0.574468085106383, "grad_norm": 3.322658061981201, "learning_rate": 4.0433815839020815e-06, "loss": 0.8621, "step": 7452 }, { "epoch": 0.5745451742214, "grad_norm": 3.8116767406463623, "learning_rate": 4.042156229055645e-06, "loss": 0.9973, "step": 7453 }, { "epoch": 0.5746222633364169, "grad_norm": 3.5855953693389893, "learning_rate": 4.040930933925808e-06, "loss": 1.0915, "step": 7454 }, { "epoch": 0.5746993524514339, "grad_norm": 3.5874369144439697, "learning_rate": 4.039705698588961e-06, "loss": 1.0041, "step": 7455 }, { "epoch": 0.5747764415664508, "grad_norm": 4.597201824188232, "learning_rate": 4.038480523121488e-06, "loss": 0.851, "step": 7456 }, { "epoch": 0.5748535306814678, "grad_norm": 3.7259809970855713, "learning_rate": 4.037255407599775e-06, "loss": 0.9158, "step": 7457 }, { "epoch": 0.5749306197964847, "grad_norm": 3.8748040199279785, "learning_rate": 4.0360303521002014e-06, "loss": 0.9733, "step": 7458 }, { "epoch": 0.5750077089115017, "grad_norm": 3.888749599456787, "learning_rate": 4.034805356699144e-06, "loss": 1.0054, "step": 7459 }, { "epoch": 0.5750847980265187, "grad_norm": 3.4167816638946533, "learning_rate": 4.033580421472973e-06, "loss": 0.8625, "step": 7460 }, { "epoch": 0.5751618871415356, "grad_norm": 4.041205406188965, "learning_rate": 4.032355546498057e-06, "loss": 0.9047, "step": 7461 }, { "epoch": 0.5752389762565526, "grad_norm": 4.094997882843018, "learning_rate": 4.031130731850762e-06, "loss": 0.9403, "step": 7462 }, { "epoch": 0.5753160653715695, "grad_norm": 3.3104913234710693, "learning_rate": 4.029905977607448e-06, "loss": 0.8705, "step": 7463 }, { "epoch": 0.5753931544865865, "grad_norm": 3.7642409801483154, "learning_rate": 4.028681283844471e-06, "loss": 0.961, "step": 7464 }, { "epoch": 0.5754702436016035, "grad_norm": 3.346750497817993, "learning_rate": 4.027456650638187e-06, "loss": 0.8146, "step": 7465 }, { "epoch": 0.5755473327166204, "grad_norm": 3.714411735534668, "learning_rate": 4.026232078064942e-06, "loss": 1.0303, "step": 7466 }, { "epoch": 0.5756244218316374, "grad_norm": 3.7148983478546143, "learning_rate": 4.025007566201085e-06, "loss": 0.8392, "step": 7467 }, { "epoch": 0.5757015109466543, "grad_norm": 3.490588426589966, "learning_rate": 4.023783115122956e-06, "loss": 0.9568, "step": 7468 }, { "epoch": 0.5757786000616713, "grad_norm": 3.651613235473633, "learning_rate": 4.0225587249068945e-06, "loss": 0.972, "step": 7469 }, { "epoch": 0.5758556891766883, "grad_norm": 3.942833185195923, "learning_rate": 4.021334395629234e-06, "loss": 1.0118, "step": 7470 }, { "epoch": 0.5759327782917052, "grad_norm": 3.5396273136138916, "learning_rate": 4.020110127366304e-06, "loss": 0.8425, "step": 7471 }, { "epoch": 0.5760098674067222, "grad_norm": 3.949531078338623, "learning_rate": 4.018885920194434e-06, "loss": 0.9779, "step": 7472 }, { "epoch": 0.5760869565217391, "grad_norm": 4.047253131866455, "learning_rate": 4.0176617741899456e-06, "loss": 1.018, "step": 7473 }, { "epoch": 0.576164045636756, "grad_norm": 4.037176609039307, "learning_rate": 4.016437689429158e-06, "loss": 0.949, "step": 7474 }, { "epoch": 0.5762411347517731, "grad_norm": 3.5076513290405273, "learning_rate": 4.015213665988385e-06, "loss": 0.9106, "step": 7475 }, { "epoch": 0.57631822386679, "grad_norm": 3.6117470264434814, "learning_rate": 4.013989703943942e-06, "loss": 0.9441, "step": 7476 }, { "epoch": 0.576395312981807, "grad_norm": 3.7086315155029297, "learning_rate": 4.012765803372132e-06, "loss": 0.9981, "step": 7477 }, { "epoch": 0.5764724020968239, "grad_norm": 3.609908103942871, "learning_rate": 4.011541964349261e-06, "loss": 0.9198, "step": 7478 }, { "epoch": 0.5765494912118408, "grad_norm": 3.6467490196228027, "learning_rate": 4.010318186951631e-06, "loss": 0.9531, "step": 7479 }, { "epoch": 0.5766265803268579, "grad_norm": 3.7244629859924316, "learning_rate": 4.009094471255536e-06, "loss": 0.9511, "step": 7480 }, { "epoch": 0.5767036694418748, "grad_norm": 4.267176628112793, "learning_rate": 4.007870817337268e-06, "loss": 0.9585, "step": 7481 }, { "epoch": 0.5767807585568918, "grad_norm": 4.043630599975586, "learning_rate": 4.006647225273116e-06, "loss": 0.9615, "step": 7482 }, { "epoch": 0.5768578476719087, "grad_norm": 3.869731903076172, "learning_rate": 4.005423695139366e-06, "loss": 0.9452, "step": 7483 }, { "epoch": 0.5769349367869256, "grad_norm": 3.4697086811065674, "learning_rate": 4.004200227012296e-06, "loss": 0.7982, "step": 7484 }, { "epoch": 0.5770120259019427, "grad_norm": 3.798818826675415, "learning_rate": 4.002976820968186e-06, "loss": 0.9925, "step": 7485 }, { "epoch": 0.5770891150169596, "grad_norm": 3.5721566677093506, "learning_rate": 4.001753477083306e-06, "loss": 0.8914, "step": 7486 }, { "epoch": 0.5771662041319766, "grad_norm": 3.3828439712524414, "learning_rate": 4.000530195433928e-06, "loss": 0.8027, "step": 7487 }, { "epoch": 0.5772432932469935, "grad_norm": 3.4918408393859863, "learning_rate": 3.999306976096315e-06, "loss": 0.8907, "step": 7488 }, { "epoch": 0.5773203823620104, "grad_norm": 3.524773359298706, "learning_rate": 3.9980838191467296e-06, "loss": 0.9068, "step": 7489 }, { "epoch": 0.5773974714770275, "grad_norm": 3.529926300048828, "learning_rate": 3.996860724661429e-06, "loss": 0.9297, "step": 7490 }, { "epoch": 0.5774745605920444, "grad_norm": 3.6954150199890137, "learning_rate": 3.995637692716666e-06, "loss": 0.9739, "step": 7491 }, { "epoch": 0.5775516497070614, "grad_norm": 3.9414443969726562, "learning_rate": 3.994414723388693e-06, "loss": 1.0595, "step": 7492 }, { "epoch": 0.5776287388220783, "grad_norm": 3.7672884464263916, "learning_rate": 3.993191816753753e-06, "loss": 0.9773, "step": 7493 }, { "epoch": 0.5777058279370952, "grad_norm": 3.520125389099121, "learning_rate": 3.991968972888088e-06, "loss": 0.8697, "step": 7494 }, { "epoch": 0.5777829170521123, "grad_norm": 3.5860490798950195, "learning_rate": 3.990746191867938e-06, "loss": 0.9923, "step": 7495 }, { "epoch": 0.5778600061671292, "grad_norm": 3.428400993347168, "learning_rate": 3.989523473769536e-06, "loss": 0.7466, "step": 7496 }, { "epoch": 0.5779370952821462, "grad_norm": 3.6904683113098145, "learning_rate": 3.98830081866911e-06, "loss": 1.0291, "step": 7497 }, { "epoch": 0.5780141843971631, "grad_norm": 3.687467336654663, "learning_rate": 3.987078226642891e-06, "loss": 0.8533, "step": 7498 }, { "epoch": 0.57809127351218, "grad_norm": 3.687129020690918, "learning_rate": 3.985855697767097e-06, "loss": 0.9885, "step": 7499 }, { "epoch": 0.5781683626271971, "grad_norm": 3.5919816493988037, "learning_rate": 3.984633232117948e-06, "loss": 1.0096, "step": 7500 }, { "epoch": 0.578245451742214, "grad_norm": 3.3104560375213623, "learning_rate": 3.983410829771656e-06, "loss": 0.9715, "step": 7501 }, { "epoch": 0.578322540857231, "grad_norm": 3.539083957672119, "learning_rate": 3.982188490804437e-06, "loss": 0.8584, "step": 7502 }, { "epoch": 0.5783996299722479, "grad_norm": 3.4675793647766113, "learning_rate": 3.980966215292493e-06, "loss": 0.903, "step": 7503 }, { "epoch": 0.5784767190872648, "grad_norm": 3.8223822116851807, "learning_rate": 3.979744003312027e-06, "loss": 0.9112, "step": 7504 }, { "epoch": 0.5785538082022819, "grad_norm": 3.458359956741333, "learning_rate": 3.97852185493924e-06, "loss": 1.0016, "step": 7505 }, { "epoch": 0.5786308973172988, "grad_norm": 3.756317138671875, "learning_rate": 3.977299770250324e-06, "loss": 0.9623, "step": 7506 }, { "epoch": 0.5787079864323158, "grad_norm": 3.434952735900879, "learning_rate": 3.976077749321472e-06, "loss": 0.8595, "step": 7507 }, { "epoch": 0.5787850755473327, "grad_norm": 3.5256009101867676, "learning_rate": 3.974855792228868e-06, "loss": 0.9876, "step": 7508 }, { "epoch": 0.5788621646623496, "grad_norm": 3.5869874954223633, "learning_rate": 3.973633899048696e-06, "loss": 0.9544, "step": 7509 }, { "epoch": 0.5789392537773667, "grad_norm": 3.856430768966675, "learning_rate": 3.9724120698571354e-06, "loss": 1.058, "step": 7510 }, { "epoch": 0.5790163428923836, "grad_norm": 3.683361053466797, "learning_rate": 3.971190304730359e-06, "loss": 0.9864, "step": 7511 }, { "epoch": 0.5790934320074006, "grad_norm": 3.582198143005371, "learning_rate": 3.969968603744539e-06, "loss": 0.8897, "step": 7512 }, { "epoch": 0.5791705211224175, "grad_norm": 3.6801340579986572, "learning_rate": 3.968746966975844e-06, "loss": 0.9485, "step": 7513 }, { "epoch": 0.5792476102374344, "grad_norm": 3.6165666580200195, "learning_rate": 3.967525394500432e-06, "loss": 0.9006, "step": 7514 }, { "epoch": 0.5793246993524515, "grad_norm": 3.775045394897461, "learning_rate": 3.966303886394465e-06, "loss": 0.9171, "step": 7515 }, { "epoch": 0.5794017884674684, "grad_norm": 3.5905048847198486, "learning_rate": 3.965082442734098e-06, "loss": 0.8586, "step": 7516 }, { "epoch": 0.5794788775824854, "grad_norm": 3.667304277420044, "learning_rate": 3.96386106359548e-06, "loss": 0.9203, "step": 7517 }, { "epoch": 0.5795559666975023, "grad_norm": 3.824692726135254, "learning_rate": 3.96263974905476e-06, "loss": 0.8806, "step": 7518 }, { "epoch": 0.5796330558125192, "grad_norm": 3.4772675037384033, "learning_rate": 3.961418499188076e-06, "loss": 0.8067, "step": 7519 }, { "epoch": 0.5797101449275363, "grad_norm": 3.65657377243042, "learning_rate": 3.960197314071571e-06, "loss": 1.0266, "step": 7520 }, { "epoch": 0.5797872340425532, "grad_norm": 3.3273978233337402, "learning_rate": 3.9589761937813795e-06, "loss": 0.82, "step": 7521 }, { "epoch": 0.5798643231575702, "grad_norm": 3.5966200828552246, "learning_rate": 3.957755138393629e-06, "loss": 0.8728, "step": 7522 }, { "epoch": 0.5799414122725871, "grad_norm": 3.8184077739715576, "learning_rate": 3.9565341479844476e-06, "loss": 1.0214, "step": 7523 }, { "epoch": 0.580018501387604, "grad_norm": 3.5819761753082275, "learning_rate": 3.9553132226299595e-06, "loss": 0.8721, "step": 7524 }, { "epoch": 0.580095590502621, "grad_norm": 3.768620491027832, "learning_rate": 3.95409236240628e-06, "loss": 0.905, "step": 7525 }, { "epoch": 0.580172679617638, "grad_norm": 3.6173248291015625, "learning_rate": 3.952871567389525e-06, "loss": 0.8692, "step": 7526 }, { "epoch": 0.580249768732655, "grad_norm": 3.3437893390655518, "learning_rate": 3.951650837655805e-06, "loss": 0.982, "step": 7527 }, { "epoch": 0.5803268578476719, "grad_norm": 3.5120673179626465, "learning_rate": 3.9504301732812255e-06, "loss": 0.9568, "step": 7528 }, { "epoch": 0.5804039469626888, "grad_norm": 3.586928606033325, "learning_rate": 3.949209574341889e-06, "loss": 0.9897, "step": 7529 }, { "epoch": 0.5804810360777058, "grad_norm": 3.88667631149292, "learning_rate": 3.947989040913893e-06, "loss": 1.0405, "step": 7530 }, { "epoch": 0.5805581251927228, "grad_norm": 3.989518165588379, "learning_rate": 3.946768573073332e-06, "loss": 0.9536, "step": 7531 }, { "epoch": 0.5806352143077398, "grad_norm": 3.595759630203247, "learning_rate": 3.945548170896296e-06, "loss": 0.9909, "step": 7532 }, { "epoch": 0.5807123034227567, "grad_norm": 3.8122918605804443, "learning_rate": 3.9443278344588696e-06, "loss": 0.9124, "step": 7533 }, { "epoch": 0.5807893925377736, "grad_norm": 3.4445278644561768, "learning_rate": 3.943107563837135e-06, "loss": 0.862, "step": 7534 }, { "epoch": 0.5808664816527906, "grad_norm": 3.818312168121338, "learning_rate": 3.941887359107172e-06, "loss": 0.9938, "step": 7535 }, { "epoch": 0.5809435707678076, "grad_norm": 3.6092066764831543, "learning_rate": 3.94066722034505e-06, "loss": 0.9801, "step": 7536 }, { "epoch": 0.5810206598828246, "grad_norm": 3.58729887008667, "learning_rate": 3.939447147626842e-06, "loss": 0.8662, "step": 7537 }, { "epoch": 0.5810977489978415, "grad_norm": 4.0023298263549805, "learning_rate": 3.93822714102861e-06, "loss": 0.929, "step": 7538 }, { "epoch": 0.5811748381128584, "grad_norm": 3.671865940093994, "learning_rate": 3.937007200626417e-06, "loss": 1.0063, "step": 7539 }, { "epoch": 0.5812519272278754, "grad_norm": 3.9772789478302, "learning_rate": 3.935787326496321e-06, "loss": 1.1299, "step": 7540 }, { "epoch": 0.5813290163428924, "grad_norm": 3.3404088020324707, "learning_rate": 3.934567518714372e-06, "loss": 0.9902, "step": 7541 }, { "epoch": 0.5814061054579094, "grad_norm": 3.876286506652832, "learning_rate": 3.9333477773566204e-06, "loss": 0.9492, "step": 7542 }, { "epoch": 0.5814831945729263, "grad_norm": 3.982120990753174, "learning_rate": 3.932128102499111e-06, "loss": 1.0037, "step": 7543 }, { "epoch": 0.5815602836879432, "grad_norm": 4.225841999053955, "learning_rate": 3.930908494217884e-06, "loss": 0.8912, "step": 7544 }, { "epoch": 0.5816373728029602, "grad_norm": 3.5947415828704834, "learning_rate": 3.929688952588974e-06, "loss": 1.0022, "step": 7545 }, { "epoch": 0.5817144619179772, "grad_norm": 3.5156421661376953, "learning_rate": 3.928469477688415e-06, "loss": 0.8517, "step": 7546 }, { "epoch": 0.5817915510329942, "grad_norm": 3.9697585105895996, "learning_rate": 3.927250069592236e-06, "loss": 0.868, "step": 7547 }, { "epoch": 0.5818686401480111, "grad_norm": 3.4268386363983154, "learning_rate": 3.926030728376458e-06, "loss": 0.8795, "step": 7548 }, { "epoch": 0.581945729263028, "grad_norm": 3.7398383617401123, "learning_rate": 3.924811454117101e-06, "loss": 0.963, "step": 7549 }, { "epoch": 0.582022818378045, "grad_norm": 3.5979716777801514, "learning_rate": 3.923592246890183e-06, "loss": 0.9222, "step": 7550 }, { "epoch": 0.582099907493062, "grad_norm": 3.304446220397949, "learning_rate": 3.922373106771713e-06, "loss": 0.895, "step": 7551 }, { "epoch": 0.582176996608079, "grad_norm": 3.7752010822296143, "learning_rate": 3.921154033837699e-06, "loss": 1.0166, "step": 7552 }, { "epoch": 0.5822540857230959, "grad_norm": 3.516181707382202, "learning_rate": 3.919935028164143e-06, "loss": 0.9529, "step": 7553 }, { "epoch": 0.5823311748381128, "grad_norm": 3.4951820373535156, "learning_rate": 3.9187160898270435e-06, "loss": 0.9276, "step": 7554 }, { "epoch": 0.5824082639531298, "grad_norm": 3.369300365447998, "learning_rate": 3.917497218902398e-06, "loss": 0.9608, "step": 7555 }, { "epoch": 0.5824853530681467, "grad_norm": 3.4424049854278564, "learning_rate": 3.916278415466193e-06, "loss": 0.9437, "step": 7556 }, { "epoch": 0.5825624421831638, "grad_norm": 3.40697979927063, "learning_rate": 3.9150596795944155e-06, "loss": 0.9566, "step": 7557 }, { "epoch": 0.5826395312981807, "grad_norm": 3.608034133911133, "learning_rate": 3.913841011363049e-06, "loss": 0.9437, "step": 7558 }, { "epoch": 0.5827166204131976, "grad_norm": 3.5082850456237793, "learning_rate": 3.912622410848069e-06, "loss": 0.8977, "step": 7559 }, { "epoch": 0.5827937095282146, "grad_norm": 3.802832841873169, "learning_rate": 3.91140387812545e-06, "loss": 0.9256, "step": 7560 }, { "epoch": 0.5828707986432315, "grad_norm": 3.8759782314300537, "learning_rate": 3.910185413271162e-06, "loss": 0.8519, "step": 7561 }, { "epoch": 0.5829478877582486, "grad_norm": 3.485323429107666, "learning_rate": 3.908967016361169e-06, "loss": 0.9075, "step": 7562 }, { "epoch": 0.5830249768732655, "grad_norm": 3.791802167892456, "learning_rate": 3.907748687471431e-06, "loss": 0.9889, "step": 7563 }, { "epoch": 0.5831020659882824, "grad_norm": 3.474576950073242, "learning_rate": 3.906530426677907e-06, "loss": 0.8918, "step": 7564 }, { "epoch": 0.5831791551032994, "grad_norm": 3.3780479431152344, "learning_rate": 3.905312234056547e-06, "loss": 0.8903, "step": 7565 }, { "epoch": 0.5832562442183163, "grad_norm": 3.6616179943084717, "learning_rate": 3.904094109683301e-06, "loss": 0.9756, "step": 7566 }, { "epoch": 0.5833333333333334, "grad_norm": 3.564548969268799, "learning_rate": 3.902876053634109e-06, "loss": 0.8836, "step": 7567 }, { "epoch": 0.5834104224483503, "grad_norm": 3.944483518600464, "learning_rate": 3.901658065984913e-06, "loss": 0.9277, "step": 7568 }, { "epoch": 0.5834875115633672, "grad_norm": 3.700523614883423, "learning_rate": 3.90044014681165e-06, "loss": 1.0303, "step": 7569 }, { "epoch": 0.5835646006783842, "grad_norm": 3.6363394260406494, "learning_rate": 3.899222296190248e-06, "loss": 0.8375, "step": 7570 }, { "epoch": 0.5836416897934011, "grad_norm": 3.8112072944641113, "learning_rate": 3.898004514196635e-06, "loss": 0.9404, "step": 7571 }, { "epoch": 0.5837187789084182, "grad_norm": 3.8142549991607666, "learning_rate": 3.896786800906734e-06, "loss": 0.908, "step": 7572 }, { "epoch": 0.5837958680234351, "grad_norm": 3.6068294048309326, "learning_rate": 3.8955691563964605e-06, "loss": 0.8951, "step": 7573 }, { "epoch": 0.583872957138452, "grad_norm": 3.8355910778045654, "learning_rate": 3.894351580741731e-06, "loss": 0.8517, "step": 7574 }, { "epoch": 0.583950046253469, "grad_norm": 3.838613986968994, "learning_rate": 3.893134074018454e-06, "loss": 1.0072, "step": 7575 }, { "epoch": 0.5840271353684859, "grad_norm": 3.381627321243286, "learning_rate": 3.891916636302535e-06, "loss": 0.9339, "step": 7576 }, { "epoch": 0.584104224483503, "grad_norm": 3.5409560203552246, "learning_rate": 3.890699267669876e-06, "loss": 0.8285, "step": 7577 }, { "epoch": 0.5841813135985199, "grad_norm": 3.678004741668701, "learning_rate": 3.889481968196371e-06, "loss": 0.9596, "step": 7578 }, { "epoch": 0.5842584027135368, "grad_norm": 4.06223726272583, "learning_rate": 3.888264737957913e-06, "loss": 1.0516, "step": 7579 }, { "epoch": 0.5843354918285538, "grad_norm": 3.5083158016204834, "learning_rate": 3.887047577030393e-06, "loss": 0.8042, "step": 7580 }, { "epoch": 0.5844125809435707, "grad_norm": 3.717420816421509, "learning_rate": 3.8858304854896906e-06, "loss": 1.0194, "step": 7581 }, { "epoch": 0.5844896700585878, "grad_norm": 3.8681986331939697, "learning_rate": 3.884613463411687e-06, "loss": 0.9029, "step": 7582 }, { "epoch": 0.5845667591736047, "grad_norm": 3.5388240814208984, "learning_rate": 3.883396510872259e-06, "loss": 0.9348, "step": 7583 }, { "epoch": 0.5846438482886216, "grad_norm": 3.749166488647461, "learning_rate": 3.882179627947273e-06, "loss": 0.9825, "step": 7584 }, { "epoch": 0.5847209374036386, "grad_norm": 3.7191250324249268, "learning_rate": 3.8809628147126e-06, "loss": 0.9845, "step": 7585 }, { "epoch": 0.5847980265186555, "grad_norm": 3.772263526916504, "learning_rate": 3.879746071244099e-06, "loss": 0.8341, "step": 7586 }, { "epoch": 0.5848751156336726, "grad_norm": 3.8493549823760986, "learning_rate": 3.8785293976176285e-06, "loss": 1.0723, "step": 7587 }, { "epoch": 0.5849522047486895, "grad_norm": 3.944638967514038, "learning_rate": 3.877312793909043e-06, "loss": 1.0129, "step": 7588 }, { "epoch": 0.5850292938637064, "grad_norm": 3.4591333866119385, "learning_rate": 3.876096260194188e-06, "loss": 1.0135, "step": 7589 }, { "epoch": 0.5851063829787234, "grad_norm": 3.324209451675415, "learning_rate": 3.87487979654891e-06, "loss": 0.8952, "step": 7590 }, { "epoch": 0.5851834720937403, "grad_norm": 3.500072956085205, "learning_rate": 3.873663403049051e-06, "loss": 0.8838, "step": 7591 }, { "epoch": 0.5852605612087574, "grad_norm": 3.5885183811187744, "learning_rate": 3.872447079770446e-06, "loss": 0.9321, "step": 7592 }, { "epoch": 0.5853376503237743, "grad_norm": 3.4161670207977295, "learning_rate": 3.871230826788925e-06, "loss": 0.9088, "step": 7593 }, { "epoch": 0.5854147394387912, "grad_norm": 3.7749154567718506, "learning_rate": 3.870014644180315e-06, "loss": 0.9423, "step": 7594 }, { "epoch": 0.5854918285538082, "grad_norm": 3.5398154258728027, "learning_rate": 3.868798532020442e-06, "loss": 0.932, "step": 7595 }, { "epoch": 0.5855689176688251, "grad_norm": 3.844788074493408, "learning_rate": 3.86758249038512e-06, "loss": 0.9811, "step": 7596 }, { "epoch": 0.5856460067838422, "grad_norm": 3.943168878555298, "learning_rate": 3.866366519350165e-06, "loss": 0.9965, "step": 7597 }, { "epoch": 0.5857230958988591, "grad_norm": 3.5133891105651855, "learning_rate": 3.865150618991388e-06, "loss": 0.9619, "step": 7598 }, { "epoch": 0.5858001850138761, "grad_norm": 3.773076295852661, "learning_rate": 3.8639347893845905e-06, "loss": 0.8272, "step": 7599 }, { "epoch": 0.585877274128893, "grad_norm": 3.4386439323425293, "learning_rate": 3.862719030605575e-06, "loss": 0.921, "step": 7600 }, { "epoch": 0.5859543632439099, "grad_norm": 3.826698064804077, "learning_rate": 3.861503342730139e-06, "loss": 0.8279, "step": 7601 }, { "epoch": 0.586031452358927, "grad_norm": 3.6429896354675293, "learning_rate": 3.860287725834072e-06, "loss": 0.8615, "step": 7602 }, { "epoch": 0.5861085414739439, "grad_norm": 3.78666353225708, "learning_rate": 3.859072179993164e-06, "loss": 0.9863, "step": 7603 }, { "epoch": 0.5861856305889609, "grad_norm": 3.275872230529785, "learning_rate": 3.857856705283195e-06, "loss": 0.7179, "step": 7604 }, { "epoch": 0.5862627197039778, "grad_norm": 3.5281994342803955, "learning_rate": 3.856641301779946e-06, "loss": 0.9711, "step": 7605 }, { "epoch": 0.5863398088189947, "grad_norm": 3.61147141456604, "learning_rate": 3.855425969559191e-06, "loss": 0.9409, "step": 7606 }, { "epoch": 0.5864168979340117, "grad_norm": 3.7258195877075195, "learning_rate": 3.854210708696697e-06, "loss": 0.8085, "step": 7607 }, { "epoch": 0.5864939870490287, "grad_norm": 3.8481357097625732, "learning_rate": 3.852995519268231e-06, "loss": 0.9489, "step": 7608 }, { "epoch": 0.5865710761640457, "grad_norm": 4.056955337524414, "learning_rate": 3.851780401349557e-06, "loss": 1.0348, "step": 7609 }, { "epoch": 0.5866481652790626, "grad_norm": 3.392282485961914, "learning_rate": 3.8505653550164255e-06, "loss": 0.8725, "step": 7610 }, { "epoch": 0.5867252543940795, "grad_norm": 3.6491785049438477, "learning_rate": 3.849350380344591e-06, "loss": 0.8837, "step": 7611 }, { "epoch": 0.5868023435090965, "grad_norm": 3.8450396060943604, "learning_rate": 3.8481354774098025e-06, "loss": 0.9088, "step": 7612 }, { "epoch": 0.5868794326241135, "grad_norm": 3.7339627742767334, "learning_rate": 3.8469206462878e-06, "loss": 0.9026, "step": 7613 }, { "epoch": 0.5869565217391305, "grad_norm": 3.736210346221924, "learning_rate": 3.845705887054324e-06, "loss": 0.8872, "step": 7614 }, { "epoch": 0.5870336108541474, "grad_norm": 3.851383924484253, "learning_rate": 3.844491199785107e-06, "loss": 0.9875, "step": 7615 }, { "epoch": 0.5871106999691643, "grad_norm": 3.5837175846099854, "learning_rate": 3.843276584555879e-06, "loss": 0.9964, "step": 7616 }, { "epoch": 0.5871877890841813, "grad_norm": 3.548447847366333, "learning_rate": 3.842062041442366e-06, "loss": 0.9743, "step": 7617 }, { "epoch": 0.5872648781991983, "grad_norm": 3.630624532699585, "learning_rate": 3.8408475705202876e-06, "loss": 0.9896, "step": 7618 }, { "epoch": 0.5873419673142153, "grad_norm": 3.6596896648406982, "learning_rate": 3.839633171865359e-06, "loss": 0.8792, "step": 7619 }, { "epoch": 0.5874190564292322, "grad_norm": 3.846327066421509, "learning_rate": 3.838418845553293e-06, "loss": 0.9892, "step": 7620 }, { "epoch": 0.5874961455442491, "grad_norm": 3.916472911834717, "learning_rate": 3.837204591659795e-06, "loss": 1.0246, "step": 7621 }, { "epoch": 0.5875732346592661, "grad_norm": 3.52048397064209, "learning_rate": 3.83599041026057e-06, "loss": 0.7953, "step": 7622 }, { "epoch": 0.5876503237742831, "grad_norm": 3.525165557861328, "learning_rate": 3.834776301431314e-06, "loss": 0.8311, "step": 7623 }, { "epoch": 0.5877274128893001, "grad_norm": 3.754812240600586, "learning_rate": 3.83356226524772e-06, "loss": 0.9588, "step": 7624 }, { "epoch": 0.587804502004317, "grad_norm": 3.5467734336853027, "learning_rate": 3.832348301785479e-06, "loss": 0.9716, "step": 7625 }, { "epoch": 0.5878815911193339, "grad_norm": 3.7867307662963867, "learning_rate": 3.831134411120273e-06, "loss": 1.0345, "step": 7626 }, { "epoch": 0.5879586802343509, "grad_norm": 3.4668374061584473, "learning_rate": 3.8299205933277814e-06, "loss": 0.8672, "step": 7627 }, { "epoch": 0.5880357693493679, "grad_norm": 3.5994906425476074, "learning_rate": 3.8287068484836835e-06, "loss": 0.8797, "step": 7628 }, { "epoch": 0.5881128584643849, "grad_norm": 3.47133731842041, "learning_rate": 3.827493176663645e-06, "loss": 0.8707, "step": 7629 }, { "epoch": 0.5881899475794018, "grad_norm": 3.8516485691070557, "learning_rate": 3.826279577943335e-06, "loss": 0.9896, "step": 7630 }, { "epoch": 0.5882670366944187, "grad_norm": 3.4910974502563477, "learning_rate": 3.825066052398416e-06, "loss": 0.9694, "step": 7631 }, { "epoch": 0.5883441258094357, "grad_norm": 3.6999757289886475, "learning_rate": 3.823852600104542e-06, "loss": 0.8824, "step": 7632 }, { "epoch": 0.5884212149244527, "grad_norm": 3.632878541946411, "learning_rate": 3.822639221137368e-06, "loss": 0.9755, "step": 7633 }, { "epoch": 0.5884983040394697, "grad_norm": 3.42018461227417, "learning_rate": 3.821425915572537e-06, "loss": 0.902, "step": 7634 }, { "epoch": 0.5885753931544866, "grad_norm": 3.5439507961273193, "learning_rate": 3.8202126834857e-06, "loss": 0.8284, "step": 7635 }, { "epoch": 0.5886524822695035, "grad_norm": 3.7671468257904053, "learning_rate": 3.818999524952491e-06, "loss": 0.9682, "step": 7636 }, { "epoch": 0.5887295713845205, "grad_norm": 3.287444829940796, "learning_rate": 3.817786440048545e-06, "loss": 0.9903, "step": 7637 }, { "epoch": 0.5888066604995374, "grad_norm": 3.516890525817871, "learning_rate": 3.8165734288494925e-06, "loss": 0.9282, "step": 7638 }, { "epoch": 0.5888837496145545, "grad_norm": 3.4296483993530273, "learning_rate": 3.815360491430956e-06, "loss": 0.8815, "step": 7639 }, { "epoch": 0.5889608387295714, "grad_norm": 3.8724493980407715, "learning_rate": 3.8141476278685596e-06, "loss": 0.8815, "step": 7640 }, { "epoch": 0.5890379278445883, "grad_norm": 3.6292412281036377, "learning_rate": 3.812934838237915e-06, "loss": 0.9961, "step": 7641 }, { "epoch": 0.5891150169596053, "grad_norm": 3.47171688079834, "learning_rate": 3.811722122614636e-06, "loss": 0.9523, "step": 7642 }, { "epoch": 0.5891921060746222, "grad_norm": 3.357323408126831, "learning_rate": 3.8105094810743286e-06, "loss": 0.959, "step": 7643 }, { "epoch": 0.5892691951896393, "grad_norm": 3.610004425048828, "learning_rate": 3.809296913692594e-06, "loss": 0.9088, "step": 7644 }, { "epoch": 0.5893462843046562, "grad_norm": 3.6334939002990723, "learning_rate": 3.80808442054503e-06, "loss": 0.9915, "step": 7645 }, { "epoch": 0.5894233734196731, "grad_norm": 3.537346601486206, "learning_rate": 3.80687200170723e-06, "loss": 0.8867, "step": 7646 }, { "epoch": 0.5895004625346901, "grad_norm": 3.5367846488952637, "learning_rate": 3.805659657254781e-06, "loss": 0.9774, "step": 7647 }, { "epoch": 0.589577551649707, "grad_norm": 3.7989237308502197, "learning_rate": 3.8044473872632663e-06, "loss": 0.959, "step": 7648 }, { "epoch": 0.5896546407647241, "grad_norm": 3.7498884201049805, "learning_rate": 3.8032351918082665e-06, "loss": 0.9014, "step": 7649 }, { "epoch": 0.589731729879741, "grad_norm": 3.9480056762695312, "learning_rate": 3.8020230709653527e-06, "loss": 0.9617, "step": 7650 }, { "epoch": 0.5898088189947579, "grad_norm": 3.7743465900421143, "learning_rate": 3.800811024810097e-06, "loss": 1.0255, "step": 7651 }, { "epoch": 0.5898859081097749, "grad_norm": 3.489729166030884, "learning_rate": 3.7995990534180627e-06, "loss": 0.929, "step": 7652 }, { "epoch": 0.5899629972247918, "grad_norm": 4.14895486831665, "learning_rate": 3.7983871568648095e-06, "loss": 1.0033, "step": 7653 }, { "epoch": 0.5900400863398089, "grad_norm": 3.2822515964508057, "learning_rate": 3.7971753352258955e-06, "loss": 0.8825, "step": 7654 }, { "epoch": 0.5901171754548258, "grad_norm": 3.469475507736206, "learning_rate": 3.7959635885768686e-06, "loss": 0.8867, "step": 7655 }, { "epoch": 0.5901942645698427, "grad_norm": 3.7009308338165283, "learning_rate": 3.7947519169932754e-06, "loss": 0.9416, "step": 7656 }, { "epoch": 0.5902713536848597, "grad_norm": 3.662630319595337, "learning_rate": 3.79354032055066e-06, "loss": 0.9328, "step": 7657 }, { "epoch": 0.5903484427998766, "grad_norm": 3.5414748191833496, "learning_rate": 3.7923287993245556e-06, "loss": 0.8743, "step": 7658 }, { "epoch": 0.5904255319148937, "grad_norm": 3.310471773147583, "learning_rate": 3.791117353390496e-06, "loss": 0.7666, "step": 7659 }, { "epoch": 0.5905026210299106, "grad_norm": 3.43023943901062, "learning_rate": 3.78990598282401e-06, "loss": 0.8936, "step": 7660 }, { "epoch": 0.5905797101449275, "grad_norm": 3.8442037105560303, "learning_rate": 3.788694687700617e-06, "loss": 1.0593, "step": 7661 }, { "epoch": 0.5906567992599445, "grad_norm": 3.430696487426758, "learning_rate": 3.787483468095838e-06, "loss": 0.8401, "step": 7662 }, { "epoch": 0.5907338883749614, "grad_norm": 3.727947950363159, "learning_rate": 3.786272324085184e-06, "loss": 0.9685, "step": 7663 }, { "epoch": 0.5908109774899785, "grad_norm": 3.676292657852173, "learning_rate": 3.7850612557441648e-06, "loss": 1.0102, "step": 7664 }, { "epoch": 0.5908880666049954, "grad_norm": 3.6820626258850098, "learning_rate": 3.783850263148284e-06, "loss": 0.9545, "step": 7665 }, { "epoch": 0.5909651557200123, "grad_norm": 3.582075357437134, "learning_rate": 3.7826393463730403e-06, "loss": 0.8736, "step": 7666 }, { "epoch": 0.5910422448350293, "grad_norm": 3.6667416095733643, "learning_rate": 3.7814285054939285e-06, "loss": 0.9364, "step": 7667 }, { "epoch": 0.5911193339500462, "grad_norm": 3.8910152912139893, "learning_rate": 3.7802177405864392e-06, "loss": 1.0272, "step": 7668 }, { "epoch": 0.5911964230650633, "grad_norm": 3.327561378479004, "learning_rate": 3.779007051726056e-06, "loss": 0.7072, "step": 7669 }, { "epoch": 0.5912735121800802, "grad_norm": 3.4671108722686768, "learning_rate": 3.77779643898826e-06, "loss": 0.8911, "step": 7670 }, { "epoch": 0.5913506012950971, "grad_norm": 3.4955384731292725, "learning_rate": 3.7765859024485246e-06, "loss": 0.7711, "step": 7671 }, { "epoch": 0.5914276904101141, "grad_norm": 3.6591098308563232, "learning_rate": 3.7753754421823225e-06, "loss": 0.9811, "step": 7672 }, { "epoch": 0.591504779525131, "grad_norm": 3.4825854301452637, "learning_rate": 3.7741650582651195e-06, "loss": 0.8656, "step": 7673 }, { "epoch": 0.5915818686401481, "grad_norm": 3.4312667846679688, "learning_rate": 3.7729547507723764e-06, "loss": 0.8741, "step": 7674 }, { "epoch": 0.591658957755165, "grad_norm": 3.5050833225250244, "learning_rate": 3.771744519779548e-06, "loss": 0.8984, "step": 7675 }, { "epoch": 0.5917360468701819, "grad_norm": 3.687978982925415, "learning_rate": 3.7705343653620894e-06, "loss": 0.8962, "step": 7676 }, { "epoch": 0.5918131359851989, "grad_norm": 3.417243480682373, "learning_rate": 3.769324287595445e-06, "loss": 0.922, "step": 7677 }, { "epoch": 0.5918902251002158, "grad_norm": 3.6329598426818848, "learning_rate": 3.7681142865550555e-06, "loss": 0.9152, "step": 7678 }, { "epoch": 0.5919673142152329, "grad_norm": 3.985581398010254, "learning_rate": 3.766904362316362e-06, "loss": 0.9494, "step": 7679 }, { "epoch": 0.5920444033302498, "grad_norm": 3.8809006214141846, "learning_rate": 3.765694514954796e-06, "loss": 0.9822, "step": 7680 }, { "epoch": 0.5921214924452667, "grad_norm": 3.394606828689575, "learning_rate": 3.7644847445457826e-06, "loss": 0.8432, "step": 7681 }, { "epoch": 0.5921985815602837, "grad_norm": 3.865122079849243, "learning_rate": 3.7632750511647478e-06, "loss": 0.9003, "step": 7682 }, { "epoch": 0.5922756706753006, "grad_norm": 3.693389892578125, "learning_rate": 3.7620654348871083e-06, "loss": 0.9377, "step": 7683 }, { "epoch": 0.5923527597903177, "grad_norm": 3.6535701751708984, "learning_rate": 3.760855895788277e-06, "loss": 0.9298, "step": 7684 }, { "epoch": 0.5924298489053346, "grad_norm": 3.484705686569214, "learning_rate": 3.759646433943662e-06, "loss": 0.9926, "step": 7685 }, { "epoch": 0.5925069380203515, "grad_norm": 3.9103033542633057, "learning_rate": 3.7584370494286697e-06, "loss": 1.0298, "step": 7686 }, { "epoch": 0.5925840271353685, "grad_norm": 3.3555185794830322, "learning_rate": 3.7572277423186964e-06, "loss": 0.8897, "step": 7687 }, { "epoch": 0.5926611162503854, "grad_norm": 3.7804808616638184, "learning_rate": 3.7560185126891375e-06, "loss": 0.9413, "step": 7688 }, { "epoch": 0.5927382053654024, "grad_norm": 3.7043962478637695, "learning_rate": 3.75480936061538e-06, "loss": 0.8945, "step": 7689 }, { "epoch": 0.5928152944804194, "grad_norm": 3.398715019226074, "learning_rate": 3.753600286172811e-06, "loss": 0.8635, "step": 7690 }, { "epoch": 0.5928923835954363, "grad_norm": 3.560133934020996, "learning_rate": 3.7523912894368093e-06, "loss": 0.9516, "step": 7691 }, { "epoch": 0.5929694727104533, "grad_norm": 3.709573745727539, "learning_rate": 3.751182370482748e-06, "loss": 0.9026, "step": 7692 }, { "epoch": 0.5930465618254702, "grad_norm": 3.8817639350891113, "learning_rate": 3.7499735293859985e-06, "loss": 0.9197, "step": 7693 }, { "epoch": 0.5931236509404872, "grad_norm": 3.9092764854431152, "learning_rate": 3.7487647662219263e-06, "loss": 0.8946, "step": 7694 }, { "epoch": 0.5932007400555042, "grad_norm": 3.4209654331207275, "learning_rate": 3.7475560810658896e-06, "loss": 0.8795, "step": 7695 }, { "epoch": 0.5932778291705211, "grad_norm": 3.7244107723236084, "learning_rate": 3.746347473993245e-06, "loss": 0.9669, "step": 7696 }, { "epoch": 0.5933549182855381, "grad_norm": 3.8219058513641357, "learning_rate": 3.745138945079343e-06, "loss": 0.9342, "step": 7697 }, { "epoch": 0.593432007400555, "grad_norm": 3.733274459838867, "learning_rate": 3.7439304943995274e-06, "loss": 1.05, "step": 7698 }, { "epoch": 0.593509096515572, "grad_norm": 3.3344218730926514, "learning_rate": 3.742722122029142e-06, "loss": 0.8988, "step": 7699 }, { "epoch": 0.593586185630589, "grad_norm": 3.70858097076416, "learning_rate": 3.741513828043519e-06, "loss": 0.9501, "step": 7700 }, { "epoch": 0.5936632747456059, "grad_norm": 3.8456056118011475, "learning_rate": 3.7403056125179916e-06, "loss": 0.9357, "step": 7701 }, { "epoch": 0.5937403638606229, "grad_norm": 3.785587787628174, "learning_rate": 3.739097475527885e-06, "loss": 0.9546, "step": 7702 }, { "epoch": 0.5938174529756398, "grad_norm": 3.5768582820892334, "learning_rate": 3.7378894171485203e-06, "loss": 0.8708, "step": 7703 }, { "epoch": 0.5938945420906568, "grad_norm": 3.9242618083953857, "learning_rate": 3.736681437455214e-06, "loss": 1.0719, "step": 7704 }, { "epoch": 0.5939716312056738, "grad_norm": 3.8821098804473877, "learning_rate": 3.7354735365232777e-06, "loss": 0.914, "step": 7705 }, { "epoch": 0.5940487203206907, "grad_norm": 3.5671024322509766, "learning_rate": 3.7342657144280162e-06, "loss": 1.0382, "step": 7706 }, { "epoch": 0.5941258094357077, "grad_norm": 4.059629917144775, "learning_rate": 3.7330579712447324e-06, "loss": 0.9346, "step": 7707 }, { "epoch": 0.5942028985507246, "grad_norm": 3.79659366607666, "learning_rate": 3.7318503070487235e-06, "loss": 0.9125, "step": 7708 }, { "epoch": 0.5942799876657416, "grad_norm": 3.4974241256713867, "learning_rate": 3.7306427219152786e-06, "loss": 0.9265, "step": 7709 }, { "epoch": 0.5943570767807586, "grad_norm": 3.5367648601531982, "learning_rate": 3.7294352159196865e-06, "loss": 0.9146, "step": 7710 }, { "epoch": 0.5944341658957755, "grad_norm": 3.3127148151397705, "learning_rate": 3.7282277891372287e-06, "loss": 0.9294, "step": 7711 }, { "epoch": 0.5945112550107925, "grad_norm": 3.6922547817230225, "learning_rate": 3.72702044164318e-06, "loss": 0.9764, "step": 7712 }, { "epoch": 0.5945883441258094, "grad_norm": 3.6928598880767822, "learning_rate": 3.7258131735128156e-06, "loss": 1.0221, "step": 7713 }, { "epoch": 0.5946654332408264, "grad_norm": 3.2286767959594727, "learning_rate": 3.7246059848213996e-06, "loss": 0.736, "step": 7714 }, { "epoch": 0.5947425223558434, "grad_norm": 3.4887712001800537, "learning_rate": 3.7233988756441953e-06, "loss": 0.9037, "step": 7715 }, { "epoch": 0.5948196114708603, "grad_norm": 4.033304691314697, "learning_rate": 3.72219184605646e-06, "loss": 0.956, "step": 7716 }, { "epoch": 0.5948967005858773, "grad_norm": 3.5219640731811523, "learning_rate": 3.720984896133444e-06, "loss": 0.9298, "step": 7717 }, { "epoch": 0.5949737897008942, "grad_norm": 3.6129660606384277, "learning_rate": 3.719778025950397e-06, "loss": 0.9154, "step": 7718 }, { "epoch": 0.5950508788159112, "grad_norm": 3.6359000205993652, "learning_rate": 3.7185712355825577e-06, "loss": 0.9319, "step": 7719 }, { "epoch": 0.5951279679309281, "grad_norm": 3.6915714740753174, "learning_rate": 3.717364525105166e-06, "loss": 0.899, "step": 7720 }, { "epoch": 0.5952050570459451, "grad_norm": 3.943981409072876, "learning_rate": 3.716157894593454e-06, "loss": 1.0859, "step": 7721 }, { "epoch": 0.5952821461609621, "grad_norm": 3.356869697570801, "learning_rate": 3.714951344122647e-06, "loss": 0.8368, "step": 7722 }, { "epoch": 0.595359235275979, "grad_norm": 3.6155312061309814, "learning_rate": 3.7137448737679677e-06, "loss": 0.8818, "step": 7723 }, { "epoch": 0.595436324390996, "grad_norm": 3.962568759918213, "learning_rate": 3.712538483604634e-06, "loss": 0.93, "step": 7724 }, { "epoch": 0.595513413506013, "grad_norm": 3.563605785369873, "learning_rate": 3.7113321737078587e-06, "loss": 0.872, "step": 7725 }, { "epoch": 0.5955905026210299, "grad_norm": 3.954345226287842, "learning_rate": 3.710125944152849e-06, "loss": 0.9366, "step": 7726 }, { "epoch": 0.5956675917360469, "grad_norm": 3.7304627895355225, "learning_rate": 3.7089197950148054e-06, "loss": 0.9616, "step": 7727 }, { "epoch": 0.5957446808510638, "grad_norm": 3.6339216232299805, "learning_rate": 3.7077137263689266e-06, "loss": 0.8768, "step": 7728 }, { "epoch": 0.5958217699660808, "grad_norm": 3.9956276416778564, "learning_rate": 3.706507738290403e-06, "loss": 1.0666, "step": 7729 }, { "epoch": 0.5958988590810977, "grad_norm": 3.52579402923584, "learning_rate": 3.705301830854423e-06, "loss": 0.9173, "step": 7730 }, { "epoch": 0.5959759481961147, "grad_norm": 3.9058704376220703, "learning_rate": 3.7040960041361696e-06, "loss": 1.0185, "step": 7731 }, { "epoch": 0.5960530373111317, "grad_norm": 4.03030252456665, "learning_rate": 3.7028902582108174e-06, "loss": 1.025, "step": 7732 }, { "epoch": 0.5961301264261486, "grad_norm": 4.030737400054932, "learning_rate": 3.7016845931535395e-06, "loss": 0.9925, "step": 7733 }, { "epoch": 0.5962072155411656, "grad_norm": 3.7005774974823, "learning_rate": 3.7004790090395043e-06, "loss": 0.9436, "step": 7734 }, { "epoch": 0.5962843046561825, "grad_norm": 3.9587509632110596, "learning_rate": 3.699273505943871e-06, "loss": 1.0326, "step": 7735 }, { "epoch": 0.5963613937711995, "grad_norm": 3.745950937271118, "learning_rate": 3.6980680839418e-06, "loss": 0.8556, "step": 7736 }, { "epoch": 0.5964384828862165, "grad_norm": 3.4118099212646484, "learning_rate": 3.696862743108439e-06, "loss": 0.9266, "step": 7737 }, { "epoch": 0.5965155720012334, "grad_norm": 3.6588337421417236, "learning_rate": 3.6956574835189374e-06, "loss": 0.9452, "step": 7738 }, { "epoch": 0.5965926611162504, "grad_norm": 3.32086443901062, "learning_rate": 3.694452305248437e-06, "loss": 0.9146, "step": 7739 }, { "epoch": 0.5966697502312673, "grad_norm": 3.491138458251953, "learning_rate": 3.693247208372074e-06, "loss": 0.7812, "step": 7740 }, { "epoch": 0.5967468393462843, "grad_norm": 3.2790908813476562, "learning_rate": 3.6920421929649786e-06, "loss": 0.7925, "step": 7741 }, { "epoch": 0.5968239284613013, "grad_norm": 3.902376174926758, "learning_rate": 3.690837259102279e-06, "loss": 0.991, "step": 7742 }, { "epoch": 0.5969010175763182, "grad_norm": 3.6180334091186523, "learning_rate": 3.689632406859096e-06, "loss": 0.8429, "step": 7743 }, { "epoch": 0.5969781066913352, "grad_norm": 4.053879737854004, "learning_rate": 3.688427636310545e-06, "loss": 1.1251, "step": 7744 }, { "epoch": 0.5970551958063521, "grad_norm": 4.163644790649414, "learning_rate": 3.68722294753174e-06, "loss": 0.9068, "step": 7745 }, { "epoch": 0.597132284921369, "grad_norm": 3.3807566165924072, "learning_rate": 3.6860183405977833e-06, "loss": 0.8411, "step": 7746 }, { "epoch": 0.5972093740363861, "grad_norm": 3.3274459838867188, "learning_rate": 3.6848138155837786e-06, "loss": 0.7565, "step": 7747 }, { "epoch": 0.597286463151403, "grad_norm": 3.35614013671875, "learning_rate": 3.683609372564821e-06, "loss": 0.7392, "step": 7748 }, { "epoch": 0.59736355226642, "grad_norm": 4.015716075897217, "learning_rate": 3.6824050116160002e-06, "loss": 1.0421, "step": 7749 }, { "epoch": 0.5974406413814369, "grad_norm": 3.730281114578247, "learning_rate": 3.681200732812405e-06, "loss": 1.0248, "step": 7750 }, { "epoch": 0.5975177304964538, "grad_norm": 3.4438388347625732, "learning_rate": 3.6799965362291123e-06, "loss": 0.8048, "step": 7751 }, { "epoch": 0.5975948196114709, "grad_norm": 3.696259021759033, "learning_rate": 3.678792421941199e-06, "loss": 0.9177, "step": 7752 }, { "epoch": 0.5976719087264878, "grad_norm": 3.7113966941833496, "learning_rate": 3.677588390023736e-06, "loss": 0.9895, "step": 7753 }, { "epoch": 0.5977489978415048, "grad_norm": 3.3123090267181396, "learning_rate": 3.6763844405517877e-06, "loss": 0.8952, "step": 7754 }, { "epoch": 0.5978260869565217, "grad_norm": 3.5116400718688965, "learning_rate": 3.6751805736004148e-06, "loss": 0.9271, "step": 7755 }, { "epoch": 0.5979031760715386, "grad_norm": 3.612499237060547, "learning_rate": 3.673976789244672e-06, "loss": 0.8571, "step": 7756 }, { "epoch": 0.5979802651865557, "grad_norm": 3.808100938796997, "learning_rate": 3.672773087559608e-06, "loss": 0.9372, "step": 7757 }, { "epoch": 0.5980573543015726, "grad_norm": 3.6125247478485107, "learning_rate": 3.6715694686202686e-06, "loss": 0.93, "step": 7758 }, { "epoch": 0.5981344434165896, "grad_norm": 3.831362724304199, "learning_rate": 3.6703659325016927e-06, "loss": 0.8422, "step": 7759 }, { "epoch": 0.5982115325316065, "grad_norm": 3.4651618003845215, "learning_rate": 3.669162479278914e-06, "loss": 0.9065, "step": 7760 }, { "epoch": 0.5982886216466234, "grad_norm": 3.6976826190948486, "learning_rate": 3.667959109026963e-06, "loss": 0.8673, "step": 7761 }, { "epoch": 0.5983657107616405, "grad_norm": 3.865349292755127, "learning_rate": 3.6667558218208617e-06, "loss": 0.988, "step": 7762 }, { "epoch": 0.5984427998766574, "grad_norm": 3.4592785835266113, "learning_rate": 3.665552617735629e-06, "loss": 0.9485, "step": 7763 }, { "epoch": 0.5985198889916744, "grad_norm": 3.4235899448394775, "learning_rate": 3.6643494968462822e-06, "loss": 0.867, "step": 7764 }, { "epoch": 0.5985969781066913, "grad_norm": 3.647528648376465, "learning_rate": 3.663146459227824e-06, "loss": 0.851, "step": 7765 }, { "epoch": 0.5986740672217082, "grad_norm": 3.5159714221954346, "learning_rate": 3.6619435049552633e-06, "loss": 0.908, "step": 7766 }, { "epoch": 0.5987511563367253, "grad_norm": 3.558000326156616, "learning_rate": 3.66074063410359e-06, "loss": 0.9187, "step": 7767 }, { "epoch": 0.5988282454517422, "grad_norm": 3.671823263168335, "learning_rate": 3.659537846747806e-06, "loss": 0.9016, "step": 7768 }, { "epoch": 0.5989053345667592, "grad_norm": 3.4551210403442383, "learning_rate": 3.658335142962894e-06, "loss": 0.8888, "step": 7769 }, { "epoch": 0.5989824236817761, "grad_norm": 3.604067087173462, "learning_rate": 3.657132522823837e-06, "loss": 0.9862, "step": 7770 }, { "epoch": 0.5990595127967931, "grad_norm": 3.8700289726257324, "learning_rate": 3.655929986405613e-06, "loss": 0.963, "step": 7771 }, { "epoch": 0.5991366019118101, "grad_norm": 3.566358804702759, "learning_rate": 3.6547275337831922e-06, "loss": 0.8178, "step": 7772 }, { "epoch": 0.599213691026827, "grad_norm": 3.576695203781128, "learning_rate": 3.653525165031543e-06, "loss": 0.9241, "step": 7773 }, { "epoch": 0.599290780141844, "grad_norm": 3.5884556770324707, "learning_rate": 3.6523228802256264e-06, "loss": 0.8936, "step": 7774 }, { "epoch": 0.5993678692568609, "grad_norm": 3.7305357456207275, "learning_rate": 3.651120679440398e-06, "loss": 0.8829, "step": 7775 }, { "epoch": 0.599444958371878, "grad_norm": 3.6430912017822266, "learning_rate": 3.6499185627508098e-06, "loss": 0.9241, "step": 7776 }, { "epoch": 0.5995220474868949, "grad_norm": 3.6668894290924072, "learning_rate": 3.648716530231806e-06, "loss": 0.8658, "step": 7777 }, { "epoch": 0.5995991366019118, "grad_norm": 3.5166971683502197, "learning_rate": 3.6475145819583285e-06, "loss": 0.9614, "step": 7778 }, { "epoch": 0.5996762257169288, "grad_norm": 3.9668848514556885, "learning_rate": 3.6463127180053125e-06, "loss": 1.0337, "step": 7779 }, { "epoch": 0.5997533148319457, "grad_norm": 3.5384409427642822, "learning_rate": 3.6451109384476875e-06, "loss": 0.9416, "step": 7780 }, { "epoch": 0.5998304039469627, "grad_norm": 3.4631733894348145, "learning_rate": 3.6439092433603775e-06, "loss": 0.819, "step": 7781 }, { "epoch": 0.5999074930619797, "grad_norm": 3.906912326812744, "learning_rate": 3.642707632818304e-06, "loss": 0.908, "step": 7782 }, { "epoch": 0.5999845821769966, "grad_norm": 3.743074893951416, "learning_rate": 3.641506106896379e-06, "loss": 0.9959, "step": 7783 }, { "epoch": 0.6000616712920136, "grad_norm": 3.657150983810425, "learning_rate": 3.640304665669514e-06, "loss": 0.9171, "step": 7784 }, { "epoch": 0.6001387604070305, "grad_norm": 3.718385934829712, "learning_rate": 3.639103309212609e-06, "loss": 0.8975, "step": 7785 }, { "epoch": 0.6002158495220475, "grad_norm": 3.5247554779052734, "learning_rate": 3.6379020376005646e-06, "loss": 0.8734, "step": 7786 }, { "epoch": 0.6002929386370645, "grad_norm": 3.818239688873291, "learning_rate": 3.6367008509082757e-06, "loss": 0.9179, "step": 7787 }, { "epoch": 0.6003700277520814, "grad_norm": 4.0803728103637695, "learning_rate": 3.6354997492106258e-06, "loss": 0.9927, "step": 7788 }, { "epoch": 0.6004471168670984, "grad_norm": 3.478654384613037, "learning_rate": 3.634298732582501e-06, "loss": 0.9289, "step": 7789 }, { "epoch": 0.6005242059821153, "grad_norm": 3.6446428298950195, "learning_rate": 3.6330978010987767e-06, "loss": 0.9966, "step": 7790 }, { "epoch": 0.6006012950971323, "grad_norm": 3.805497407913208, "learning_rate": 3.6318969548343246e-06, "loss": 1.0771, "step": 7791 }, { "epoch": 0.6006783842121493, "grad_norm": 3.4235615730285645, "learning_rate": 3.6306961938640116e-06, "loss": 0.9165, "step": 7792 }, { "epoch": 0.6007554733271662, "grad_norm": 3.7066256999969482, "learning_rate": 3.6294955182627007e-06, "loss": 0.8412, "step": 7793 }, { "epoch": 0.6008325624421832, "grad_norm": 3.6571426391601562, "learning_rate": 3.628294928105245e-06, "loss": 0.8972, "step": 7794 }, { "epoch": 0.6009096515572001, "grad_norm": 3.8981006145477295, "learning_rate": 3.6270944234664972e-06, "loss": 0.8919, "step": 7795 }, { "epoch": 0.6009867406722171, "grad_norm": 3.7625627517700195, "learning_rate": 3.6258940044213004e-06, "loss": 0.8221, "step": 7796 }, { "epoch": 0.601063829787234, "grad_norm": 4.007872104644775, "learning_rate": 3.6246936710444957e-06, "loss": 0.9362, "step": 7797 }, { "epoch": 0.601140918902251, "grad_norm": 3.732297420501709, "learning_rate": 3.623493423410919e-06, "loss": 0.8811, "step": 7798 }, { "epoch": 0.601218008017268, "grad_norm": 3.483485698699951, "learning_rate": 3.6222932615953977e-06, "loss": 0.9207, "step": 7799 }, { "epoch": 0.6012950971322849, "grad_norm": 3.7129147052764893, "learning_rate": 3.6210931856727547e-06, "loss": 0.8513, "step": 7800 }, { "epoch": 0.6013721862473019, "grad_norm": 3.39215087890625, "learning_rate": 3.619893195717813e-06, "loss": 0.8216, "step": 7801 }, { "epoch": 0.6014492753623188, "grad_norm": 3.39719295501709, "learning_rate": 3.6186932918053806e-06, "loss": 1.0014, "step": 7802 }, { "epoch": 0.6015263644773358, "grad_norm": 3.573181629180908, "learning_rate": 3.6174934740102672e-06, "loss": 0.864, "step": 7803 }, { "epoch": 0.6016034535923528, "grad_norm": 3.7137699127197266, "learning_rate": 3.6162937424072775e-06, "loss": 1.0308, "step": 7804 }, { "epoch": 0.6016805427073697, "grad_norm": 3.913328170776367, "learning_rate": 3.6150940970712045e-06, "loss": 0.9982, "step": 7805 }, { "epoch": 0.6017576318223867, "grad_norm": 3.644329071044922, "learning_rate": 3.6138945380768442e-06, "loss": 0.9262, "step": 7806 }, { "epoch": 0.6018347209374036, "grad_norm": 3.936444044113159, "learning_rate": 3.612695065498979e-06, "loss": 0.8839, "step": 7807 }, { "epoch": 0.6019118100524206, "grad_norm": 3.725987434387207, "learning_rate": 3.6114956794123913e-06, "loss": 0.9187, "step": 7808 }, { "epoch": 0.6019888991674376, "grad_norm": 3.515115261077881, "learning_rate": 3.6102963798918576e-06, "loss": 0.9055, "step": 7809 }, { "epoch": 0.6020659882824545, "grad_norm": 3.660930871963501, "learning_rate": 3.6090971670121468e-06, "loss": 0.8665, "step": 7810 }, { "epoch": 0.6021430773974715, "grad_norm": 3.7818009853363037, "learning_rate": 3.6078980408480217e-06, "loss": 1.0293, "step": 7811 }, { "epoch": 0.6022201665124884, "grad_norm": 4.332160949707031, "learning_rate": 3.606699001474246e-06, "loss": 1.1111, "step": 7812 }, { "epoch": 0.6022972556275054, "grad_norm": 3.5895073413848877, "learning_rate": 3.6055000489655725e-06, "loss": 0.8604, "step": 7813 }, { "epoch": 0.6023743447425224, "grad_norm": 3.754880666732788, "learning_rate": 3.6043011833967465e-06, "loss": 0.9298, "step": 7814 }, { "epoch": 0.6024514338575393, "grad_norm": 4.089043140411377, "learning_rate": 3.6031024048425146e-06, "loss": 1.0692, "step": 7815 }, { "epoch": 0.6025285229725563, "grad_norm": 3.798560619354248, "learning_rate": 3.6019037133776136e-06, "loss": 0.918, "step": 7816 }, { "epoch": 0.6026056120875732, "grad_norm": 3.821998357772827, "learning_rate": 3.6007051090767737e-06, "loss": 0.9856, "step": 7817 }, { "epoch": 0.6026827012025902, "grad_norm": 3.517061948776245, "learning_rate": 3.5995065920147233e-06, "loss": 0.8683, "step": 7818 }, { "epoch": 0.6027597903176072, "grad_norm": 3.6391801834106445, "learning_rate": 3.598308162266185e-06, "loss": 0.8445, "step": 7819 }, { "epoch": 0.6028368794326241, "grad_norm": 3.6682894229888916, "learning_rate": 3.5971098199058716e-06, "loss": 0.9821, "step": 7820 }, { "epoch": 0.6029139685476411, "grad_norm": 4.120725154876709, "learning_rate": 3.5959115650084954e-06, "loss": 0.8732, "step": 7821 }, { "epoch": 0.602991057662658, "grad_norm": 3.8292622566223145, "learning_rate": 3.5947133976487623e-06, "loss": 0.986, "step": 7822 }, { "epoch": 0.603068146777675, "grad_norm": 3.57987904548645, "learning_rate": 3.5935153179013692e-06, "loss": 0.8983, "step": 7823 }, { "epoch": 0.603145235892692, "grad_norm": 3.4487192630767822, "learning_rate": 3.592317325841014e-06, "loss": 0.8779, "step": 7824 }, { "epoch": 0.6032223250077089, "grad_norm": 3.598322629928589, "learning_rate": 3.5911194215423805e-06, "loss": 0.9429, "step": 7825 }, { "epoch": 0.6032994141227259, "grad_norm": 3.9131698608398438, "learning_rate": 3.5899216050801548e-06, "loss": 1.0573, "step": 7826 }, { "epoch": 0.6033765032377428, "grad_norm": 3.5165915489196777, "learning_rate": 3.5887238765290155e-06, "loss": 0.8065, "step": 7827 }, { "epoch": 0.6034535923527597, "grad_norm": 3.4802615642547607, "learning_rate": 3.5875262359636316e-06, "loss": 0.9431, "step": 7828 }, { "epoch": 0.6035306814677768, "grad_norm": 4.010217666625977, "learning_rate": 3.586328683458672e-06, "loss": 0.9236, "step": 7829 }, { "epoch": 0.6036077705827937, "grad_norm": 3.4742398262023926, "learning_rate": 3.5851312190887975e-06, "loss": 0.9148, "step": 7830 }, { "epoch": 0.6036848596978107, "grad_norm": 3.8777101039886475, "learning_rate": 3.583933842928664e-06, "loss": 0.9534, "step": 7831 }, { "epoch": 0.6037619488128276, "grad_norm": 3.97666335105896, "learning_rate": 3.5827365550529215e-06, "loss": 1.0859, "step": 7832 }, { "epoch": 0.6038390379278445, "grad_norm": 3.1657111644744873, "learning_rate": 3.581539355536214e-06, "loss": 0.9061, "step": 7833 }, { "epoch": 0.6039161270428616, "grad_norm": 3.550740957260132, "learning_rate": 3.580342244453181e-06, "loss": 0.9728, "step": 7834 }, { "epoch": 0.6039932161578785, "grad_norm": 3.700817108154297, "learning_rate": 3.5791452218784572e-06, "loss": 0.9626, "step": 7835 }, { "epoch": 0.6040703052728955, "grad_norm": 3.693549633026123, "learning_rate": 3.57794828788667e-06, "loss": 1.0247, "step": 7836 }, { "epoch": 0.6041473943879124, "grad_norm": 3.4788200855255127, "learning_rate": 3.5767514425524413e-06, "loss": 0.9126, "step": 7837 }, { "epoch": 0.6042244835029293, "grad_norm": 3.526183843612671, "learning_rate": 3.57555468595039e-06, "loss": 1.0105, "step": 7838 }, { "epoch": 0.6043015726179464, "grad_norm": 3.612081289291382, "learning_rate": 3.5743580181551265e-06, "loss": 0.9198, "step": 7839 }, { "epoch": 0.6043786617329633, "grad_norm": 3.6444640159606934, "learning_rate": 3.5731614392412557e-06, "loss": 0.9105, "step": 7840 }, { "epoch": 0.6044557508479803, "grad_norm": 3.4978742599487305, "learning_rate": 3.571964949283381e-06, "loss": 0.8368, "step": 7841 }, { "epoch": 0.6045328399629972, "grad_norm": 3.5534303188323975, "learning_rate": 3.5707685483560948e-06, "loss": 0.9524, "step": 7842 }, { "epoch": 0.6046099290780141, "grad_norm": 3.6037681102752686, "learning_rate": 3.569572236533988e-06, "loss": 0.9408, "step": 7843 }, { "epoch": 0.6046870181930312, "grad_norm": 3.651366949081421, "learning_rate": 3.5683760138916433e-06, "loss": 0.8935, "step": 7844 }, { "epoch": 0.6047641073080481, "grad_norm": 3.578256368637085, "learning_rate": 3.5671798805036396e-06, "loss": 0.9498, "step": 7845 }, { "epoch": 0.6048411964230651, "grad_norm": 4.041118621826172, "learning_rate": 3.5659838364445505e-06, "loss": 0.9394, "step": 7846 }, { "epoch": 0.604918285538082, "grad_norm": 3.86403489112854, "learning_rate": 3.564787881788941e-06, "loss": 0.9925, "step": 7847 }, { "epoch": 0.6049953746530989, "grad_norm": 3.811384916305542, "learning_rate": 3.5635920166113735e-06, "loss": 0.9659, "step": 7848 }, { "epoch": 0.605072463768116, "grad_norm": 3.6318352222442627, "learning_rate": 3.5623962409864066e-06, "loss": 0.8375, "step": 7849 }, { "epoch": 0.6051495528831329, "grad_norm": 3.6698193550109863, "learning_rate": 3.5612005549885865e-06, "loss": 1.0244, "step": 7850 }, { "epoch": 0.6052266419981499, "grad_norm": 3.5718917846679688, "learning_rate": 3.5600049586924607e-06, "loss": 0.9182, "step": 7851 }, { "epoch": 0.6053037311131668, "grad_norm": 3.9225900173187256, "learning_rate": 3.5588094521725687e-06, "loss": 0.9336, "step": 7852 }, { "epoch": 0.6053808202281837, "grad_norm": 3.1916608810424805, "learning_rate": 3.5576140355034415e-06, "loss": 0.8681, "step": 7853 }, { "epoch": 0.6054579093432008, "grad_norm": 3.703646659851074, "learning_rate": 3.5564187087596116e-06, "loss": 0.9635, "step": 7854 }, { "epoch": 0.6055349984582177, "grad_norm": 3.636467218399048, "learning_rate": 3.5552234720155963e-06, "loss": 0.8925, "step": 7855 }, { "epoch": 0.6056120875732347, "grad_norm": 3.733719825744629, "learning_rate": 3.554028325345914e-06, "loss": 0.9168, "step": 7856 }, { "epoch": 0.6056891766882516, "grad_norm": 3.497403860092163, "learning_rate": 3.552833268825078e-06, "loss": 0.9535, "step": 7857 }, { "epoch": 0.6057662658032685, "grad_norm": 4.8122382164001465, "learning_rate": 3.5516383025275925e-06, "loss": 0.9782, "step": 7858 }, { "epoch": 0.6058433549182856, "grad_norm": 3.9689764976501465, "learning_rate": 3.5504434265279587e-06, "loss": 1.006, "step": 7859 }, { "epoch": 0.6059204440333025, "grad_norm": 3.636197566986084, "learning_rate": 3.5492486409006684e-06, "loss": 0.9264, "step": 7860 }, { "epoch": 0.6059975331483195, "grad_norm": 4.189083099365234, "learning_rate": 3.5480539457202127e-06, "loss": 0.9417, "step": 7861 }, { "epoch": 0.6060746222633364, "grad_norm": 3.744331121444702, "learning_rate": 3.546859341061073e-06, "loss": 0.8417, "step": 7862 }, { "epoch": 0.6061517113783533, "grad_norm": 3.3087058067321777, "learning_rate": 3.545664826997727e-06, "loss": 0.911, "step": 7863 }, { "epoch": 0.6062288004933704, "grad_norm": 3.681582450866699, "learning_rate": 3.5444704036046485e-06, "loss": 0.9636, "step": 7864 }, { "epoch": 0.6063058896083873, "grad_norm": 3.7215073108673096, "learning_rate": 3.543276070956301e-06, "loss": 0.9945, "step": 7865 }, { "epoch": 0.6063829787234043, "grad_norm": 3.6413660049438477, "learning_rate": 3.542081829127145e-06, "loss": 0.8434, "step": 7866 }, { "epoch": 0.6064600678384212, "grad_norm": 3.857893705368042, "learning_rate": 3.540887678191638e-06, "loss": 0.9055, "step": 7867 }, { "epoch": 0.6065371569534381, "grad_norm": 3.426194906234741, "learning_rate": 3.539693618224226e-06, "loss": 0.9556, "step": 7868 }, { "epoch": 0.6066142460684552, "grad_norm": 3.5567467212677, "learning_rate": 3.538499649299354e-06, "loss": 0.9462, "step": 7869 }, { "epoch": 0.6066913351834721, "grad_norm": 3.7717530727386475, "learning_rate": 3.5373057714914607e-06, "loss": 0.9073, "step": 7870 }, { "epoch": 0.6067684242984891, "grad_norm": 4.004831790924072, "learning_rate": 3.5361119848749755e-06, "loss": 0.9405, "step": 7871 }, { "epoch": 0.606845513413506, "grad_norm": 3.8351056575775146, "learning_rate": 3.5349182895243274e-06, "loss": 1.0261, "step": 7872 }, { "epoch": 0.6069226025285229, "grad_norm": 3.8197968006134033, "learning_rate": 3.533724685513935e-06, "loss": 0.9427, "step": 7873 }, { "epoch": 0.60699969164354, "grad_norm": 3.7887625694274902, "learning_rate": 3.5325311729182155e-06, "loss": 1.1297, "step": 7874 }, { "epoch": 0.6070767807585569, "grad_norm": 3.8279473781585693, "learning_rate": 3.5313377518115772e-06, "loss": 0.9428, "step": 7875 }, { "epoch": 0.6071538698735739, "grad_norm": 3.926093101501465, "learning_rate": 3.530144422268423e-06, "loss": 0.8763, "step": 7876 }, { "epoch": 0.6072309589885908, "grad_norm": 3.941603183746338, "learning_rate": 3.528951184363151e-06, "loss": 1.0133, "step": 7877 }, { "epoch": 0.6073080481036077, "grad_norm": 3.6375105381011963, "learning_rate": 3.5277580381701553e-06, "loss": 0.9153, "step": 7878 }, { "epoch": 0.6073851372186247, "grad_norm": 3.378063678741455, "learning_rate": 3.52656498376382e-06, "loss": 0.8914, "step": 7879 }, { "epoch": 0.6074622263336417, "grad_norm": 3.7920594215393066, "learning_rate": 3.5253720212185284e-06, "loss": 1.0532, "step": 7880 }, { "epoch": 0.6075393154486587, "grad_norm": 3.6664042472839355, "learning_rate": 3.524179150608652e-06, "loss": 0.7833, "step": 7881 }, { "epoch": 0.6076164045636756, "grad_norm": 3.3724348545074463, "learning_rate": 3.5229863720085623e-06, "loss": 0.9102, "step": 7882 }, { "epoch": 0.6076934936786925, "grad_norm": 3.686537504196167, "learning_rate": 3.521793685492624e-06, "loss": 0.9696, "step": 7883 }, { "epoch": 0.6077705827937095, "grad_norm": 3.747305154800415, "learning_rate": 3.5206010911351924e-06, "loss": 1.0284, "step": 7884 }, { "epoch": 0.6078476719087265, "grad_norm": 3.655407667160034, "learning_rate": 3.5194085890106204e-06, "loss": 0.9406, "step": 7885 }, { "epoch": 0.6079247610237435, "grad_norm": 3.6293342113494873, "learning_rate": 3.518216179193257e-06, "loss": 0.9043, "step": 7886 }, { "epoch": 0.6080018501387604, "grad_norm": 3.54872465133667, "learning_rate": 3.517023861757438e-06, "loss": 1.0235, "step": 7887 }, { "epoch": 0.6080789392537773, "grad_norm": 4.1367106437683105, "learning_rate": 3.5158316367775016e-06, "loss": 0.968, "step": 7888 }, { "epoch": 0.6081560283687943, "grad_norm": 3.3810555934906006, "learning_rate": 3.514639504327776e-06, "loss": 0.8467, "step": 7889 }, { "epoch": 0.6082331174838113, "grad_norm": 3.417717456817627, "learning_rate": 3.5134474644825847e-06, "loss": 0.8455, "step": 7890 }, { "epoch": 0.6083102065988283, "grad_norm": 3.642557144165039, "learning_rate": 3.512255517316245e-06, "loss": 0.9056, "step": 7891 }, { "epoch": 0.6083872957138452, "grad_norm": 3.8719959259033203, "learning_rate": 3.5110636629030674e-06, "loss": 0.9462, "step": 7892 }, { "epoch": 0.6084643848288621, "grad_norm": 3.6329193115234375, "learning_rate": 3.509871901317359e-06, "loss": 0.9395, "step": 7893 }, { "epoch": 0.6085414739438791, "grad_norm": 4.278444290161133, "learning_rate": 3.5086802326334217e-06, "loss": 0.946, "step": 7894 }, { "epoch": 0.6086185630588961, "grad_norm": 3.6533453464508057, "learning_rate": 3.507488656925547e-06, "loss": 0.9943, "step": 7895 }, { "epoch": 0.6086956521739131, "grad_norm": 3.3873438835144043, "learning_rate": 3.5062971742680244e-06, "loss": 0.9275, "step": 7896 }, { "epoch": 0.60877274128893, "grad_norm": 3.647336006164551, "learning_rate": 3.5051057847351377e-06, "loss": 0.8534, "step": 7897 }, { "epoch": 0.6088498304039469, "grad_norm": 3.4682047367095947, "learning_rate": 3.503914488401163e-06, "loss": 0.9414, "step": 7898 }, { "epoch": 0.6089269195189639, "grad_norm": 3.890171527862549, "learning_rate": 3.5027232853403705e-06, "loss": 1.0898, "step": 7899 }, { "epoch": 0.6090040086339809, "grad_norm": 3.491349220275879, "learning_rate": 3.501532175627026e-06, "loss": 0.9413, "step": 7900 }, { "epoch": 0.6090810977489979, "grad_norm": 3.580474376678467, "learning_rate": 3.5003411593353913e-06, "loss": 0.9499, "step": 7901 }, { "epoch": 0.6091581868640148, "grad_norm": 3.8808116912841797, "learning_rate": 3.4991502365397177e-06, "loss": 0.9255, "step": 7902 }, { "epoch": 0.6092352759790317, "grad_norm": 3.8387744426727295, "learning_rate": 3.4979594073142535e-06, "loss": 0.7857, "step": 7903 }, { "epoch": 0.6093123650940487, "grad_norm": 3.8449535369873047, "learning_rate": 3.4967686717332426e-06, "loss": 0.851, "step": 7904 }, { "epoch": 0.6093894542090657, "grad_norm": 4.096821308135986, "learning_rate": 3.495578029870918e-06, "loss": 0.9903, "step": 7905 }, { "epoch": 0.6094665433240827, "grad_norm": 3.537116050720215, "learning_rate": 3.4943874818015115e-06, "loss": 0.9367, "step": 7906 }, { "epoch": 0.6095436324390996, "grad_norm": 3.9148404598236084, "learning_rate": 3.493197027599249e-06, "loss": 1.0676, "step": 7907 }, { "epoch": 0.6096207215541165, "grad_norm": 3.719594717025757, "learning_rate": 3.492006667338347e-06, "loss": 0.8925, "step": 7908 }, { "epoch": 0.6096978106691335, "grad_norm": 3.5892961025238037, "learning_rate": 3.49081640109302e-06, "loss": 0.9494, "step": 7909 }, { "epoch": 0.6097748997841504, "grad_norm": 3.9413259029388428, "learning_rate": 3.489626228937473e-06, "loss": 0.8696, "step": 7910 }, { "epoch": 0.6098519888991675, "grad_norm": 3.645066499710083, "learning_rate": 3.4884361509459088e-06, "loss": 0.9745, "step": 7911 }, { "epoch": 0.6099290780141844, "grad_norm": 3.4567646980285645, "learning_rate": 3.4872461671925227e-06, "loss": 0.721, "step": 7912 }, { "epoch": 0.6100061671292013, "grad_norm": 3.5239381790161133, "learning_rate": 3.486056277751502e-06, "loss": 0.9016, "step": 7913 }, { "epoch": 0.6100832562442183, "grad_norm": 3.2868807315826416, "learning_rate": 3.484866482697032e-06, "loss": 0.888, "step": 7914 }, { "epoch": 0.6101603453592352, "grad_norm": 3.2013051509857178, "learning_rate": 3.4836767821032902e-06, "loss": 0.8686, "step": 7915 }, { "epoch": 0.6102374344742523, "grad_norm": 3.507582426071167, "learning_rate": 3.4824871760444465e-06, "loss": 1.027, "step": 7916 }, { "epoch": 0.6103145235892692, "grad_norm": 3.7498128414154053, "learning_rate": 3.48129766459467e-06, "loss": 1.0387, "step": 7917 }, { "epoch": 0.6103916127042861, "grad_norm": 3.7105958461761475, "learning_rate": 3.480108247828117e-06, "loss": 0.9652, "step": 7918 }, { "epoch": 0.6104687018193031, "grad_norm": 3.8917605876922607, "learning_rate": 3.478918925818943e-06, "loss": 0.9192, "step": 7919 }, { "epoch": 0.61054579093432, "grad_norm": 3.6685686111450195, "learning_rate": 3.4777296986412972e-06, "loss": 0.9294, "step": 7920 }, { "epoch": 0.6106228800493371, "grad_norm": 3.4759345054626465, "learning_rate": 3.4765405663693196e-06, "loss": 0.8923, "step": 7921 }, { "epoch": 0.610699969164354, "grad_norm": 3.711207151412964, "learning_rate": 3.475351529077148e-06, "loss": 0.9937, "step": 7922 }, { "epoch": 0.6107770582793709, "grad_norm": 3.594850540161133, "learning_rate": 3.474162586838913e-06, "loss": 0.8816, "step": 7923 }, { "epoch": 0.6108541473943879, "grad_norm": 3.8033182621002197, "learning_rate": 3.4729737397287365e-06, "loss": 0.9382, "step": 7924 }, { "epoch": 0.6109312365094048, "grad_norm": 3.380199432373047, "learning_rate": 3.4717849878207387e-06, "loss": 0.8568, "step": 7925 }, { "epoch": 0.6110083256244219, "grad_norm": 3.973435640335083, "learning_rate": 3.4705963311890334e-06, "loss": 0.8793, "step": 7926 }, { "epoch": 0.6110854147394388, "grad_norm": 3.6724209785461426, "learning_rate": 3.469407769907725e-06, "loss": 0.8936, "step": 7927 }, { "epoch": 0.6111625038544557, "grad_norm": 3.7241382598876953, "learning_rate": 3.468219304050916e-06, "loss": 0.9815, "step": 7928 }, { "epoch": 0.6112395929694727, "grad_norm": 3.8722872734069824, "learning_rate": 3.4670309336926988e-06, "loss": 1.0309, "step": 7929 }, { "epoch": 0.6113166820844896, "grad_norm": 3.580594301223755, "learning_rate": 3.4658426589071637e-06, "loss": 0.834, "step": 7930 }, { "epoch": 0.6113937711995067, "grad_norm": 3.637439012527466, "learning_rate": 3.4646544797683946e-06, "loss": 0.9892, "step": 7931 }, { "epoch": 0.6114708603145236, "grad_norm": 3.7209787368774414, "learning_rate": 3.4634663963504654e-06, "loss": 0.8565, "step": 7932 }, { "epoch": 0.6115479494295405, "grad_norm": 3.676300525665283, "learning_rate": 3.4622784087274487e-06, "loss": 1.0424, "step": 7933 }, { "epoch": 0.6116250385445575, "grad_norm": 4.965175151824951, "learning_rate": 3.46109051697341e-06, "loss": 0.9192, "step": 7934 }, { "epoch": 0.6117021276595744, "grad_norm": 3.5655887126922607, "learning_rate": 3.459902721162406e-06, "loss": 0.958, "step": 7935 }, { "epoch": 0.6117792167745915, "grad_norm": 3.9590542316436768, "learning_rate": 3.4587150213684917e-06, "loss": 0.9491, "step": 7936 }, { "epoch": 0.6118563058896084, "grad_norm": 3.773998498916626, "learning_rate": 3.4575274176657134e-06, "loss": 0.9361, "step": 7937 }, { "epoch": 0.6119333950046254, "grad_norm": 3.76638126373291, "learning_rate": 3.4563399101281116e-06, "loss": 0.9906, "step": 7938 }, { "epoch": 0.6120104841196423, "grad_norm": 3.668475389480591, "learning_rate": 3.4551524988297224e-06, "loss": 0.782, "step": 7939 }, { "epoch": 0.6120875732346592, "grad_norm": 3.6993038654327393, "learning_rate": 3.453965183844573e-06, "loss": 0.9687, "step": 7940 }, { "epoch": 0.6121646623496763, "grad_norm": 4.174620628356934, "learning_rate": 3.4527779652466876e-06, "loss": 0.9987, "step": 7941 }, { "epoch": 0.6122417514646932, "grad_norm": 3.559088945388794, "learning_rate": 3.451590843110083e-06, "loss": 1.0611, "step": 7942 }, { "epoch": 0.6123188405797102, "grad_norm": 3.860713243484497, "learning_rate": 3.4504038175087697e-06, "loss": 0.9319, "step": 7943 }, { "epoch": 0.6123959296947271, "grad_norm": 3.864760160446167, "learning_rate": 3.449216888516751e-06, "loss": 0.8688, "step": 7944 }, { "epoch": 0.612473018809744, "grad_norm": 3.620938539505005, "learning_rate": 3.448030056208029e-06, "loss": 0.8719, "step": 7945 }, { "epoch": 0.6125501079247611, "grad_norm": 3.6368861198425293, "learning_rate": 3.4468433206565964e-06, "loss": 0.9333, "step": 7946 }, { "epoch": 0.612627197039778, "grad_norm": 3.713548421859741, "learning_rate": 3.445656681936438e-06, "loss": 1.0769, "step": 7947 }, { "epoch": 0.612704286154795, "grad_norm": 3.9157612323760986, "learning_rate": 3.4444701401215343e-06, "loss": 0.8697, "step": 7948 }, { "epoch": 0.6127813752698119, "grad_norm": 3.3834121227264404, "learning_rate": 3.443283695285863e-06, "loss": 0.7926, "step": 7949 }, { "epoch": 0.6128584643848288, "grad_norm": 3.601344108581543, "learning_rate": 3.4420973475033894e-06, "loss": 0.922, "step": 7950 }, { "epoch": 0.6129355534998459, "grad_norm": 3.9263248443603516, "learning_rate": 3.4409110968480773e-06, "loss": 0.8751, "step": 7951 }, { "epoch": 0.6130126426148628, "grad_norm": 3.377624988555908, "learning_rate": 3.439724943393885e-06, "loss": 0.8985, "step": 7952 }, { "epoch": 0.6130897317298798, "grad_norm": 3.6270108222961426, "learning_rate": 3.438538887214761e-06, "loss": 0.965, "step": 7953 }, { "epoch": 0.6131668208448967, "grad_norm": 3.6317126750946045, "learning_rate": 3.4373529283846498e-06, "loss": 0.8884, "step": 7954 }, { "epoch": 0.6132439099599136, "grad_norm": 3.744412422180176, "learning_rate": 3.4361670669774917e-06, "loss": 0.9421, "step": 7955 }, { "epoch": 0.6133209990749307, "grad_norm": 3.9327218532562256, "learning_rate": 3.4349813030672165e-06, "loss": 0.894, "step": 7956 }, { "epoch": 0.6133980881899476, "grad_norm": 3.5057458877563477, "learning_rate": 3.4337956367277524e-06, "loss": 0.9483, "step": 7957 }, { "epoch": 0.6134751773049646, "grad_norm": 3.859243869781494, "learning_rate": 3.432610068033018e-06, "loss": 1.0009, "step": 7958 }, { "epoch": 0.6135522664199815, "grad_norm": 3.938803195953369, "learning_rate": 3.4314245970569283e-06, "loss": 0.8712, "step": 7959 }, { "epoch": 0.6136293555349984, "grad_norm": 3.743238687515259, "learning_rate": 3.4302392238733916e-06, "loss": 0.9356, "step": 7960 }, { "epoch": 0.6137064446500154, "grad_norm": 3.5383241176605225, "learning_rate": 3.429053948556309e-06, "loss": 1.0357, "step": 7961 }, { "epoch": 0.6137835337650324, "grad_norm": 3.5702669620513916, "learning_rate": 3.4278687711795754e-06, "loss": 0.9484, "step": 7962 }, { "epoch": 0.6138606228800494, "grad_norm": 3.585568904876709, "learning_rate": 3.4266836918170844e-06, "loss": 0.8646, "step": 7963 }, { "epoch": 0.6139377119950663, "grad_norm": 3.544301748275757, "learning_rate": 3.425498710542714e-06, "loss": 0.9274, "step": 7964 }, { "epoch": 0.6140148011100832, "grad_norm": 3.528738498687744, "learning_rate": 3.424313827430347e-06, "loss": 0.8791, "step": 7965 }, { "epoch": 0.6140918902251002, "grad_norm": 3.8694934844970703, "learning_rate": 3.42312904255385e-06, "loss": 0.9614, "step": 7966 }, { "epoch": 0.6141689793401172, "grad_norm": 3.590520143508911, "learning_rate": 3.4219443559870906e-06, "loss": 0.9202, "step": 7967 }, { "epoch": 0.6142460684551342, "grad_norm": 3.699113607406616, "learning_rate": 3.4207597678039293e-06, "loss": 0.9423, "step": 7968 }, { "epoch": 0.6143231575701511, "grad_norm": 3.463752508163452, "learning_rate": 3.4195752780782156e-06, "loss": 0.9054, "step": 7969 }, { "epoch": 0.614400246685168, "grad_norm": 3.7661490440368652, "learning_rate": 3.4183908868837988e-06, "loss": 0.8484, "step": 7970 }, { "epoch": 0.614477335800185, "grad_norm": 3.7998509407043457, "learning_rate": 3.4172065942945194e-06, "loss": 0.9527, "step": 7971 }, { "epoch": 0.614554424915202, "grad_norm": 3.6133172512054443, "learning_rate": 3.4160224003842103e-06, "loss": 0.9766, "step": 7972 }, { "epoch": 0.614631514030219, "grad_norm": 3.8746337890625, "learning_rate": 3.414838305226701e-06, "loss": 0.9468, "step": 7973 }, { "epoch": 0.6147086031452359, "grad_norm": 3.8318541049957275, "learning_rate": 3.4136543088958147e-06, "loss": 0.9815, "step": 7974 }, { "epoch": 0.6147856922602528, "grad_norm": 3.35244083404541, "learning_rate": 3.4124704114653653e-06, "loss": 0.83, "step": 7975 }, { "epoch": 0.6148627813752698, "grad_norm": 3.994019031524658, "learning_rate": 3.4112866130091648e-06, "loss": 0.915, "step": 7976 }, { "epoch": 0.6149398704902868, "grad_norm": 3.6324615478515625, "learning_rate": 3.4101029136010144e-06, "loss": 0.8744, "step": 7977 }, { "epoch": 0.6150169596053038, "grad_norm": 3.4820597171783447, "learning_rate": 3.4089193133147136e-06, "loss": 0.8885, "step": 7978 }, { "epoch": 0.6150940487203207, "grad_norm": 3.644369602203369, "learning_rate": 3.4077358122240532e-06, "loss": 0.996, "step": 7979 }, { "epoch": 0.6151711378353376, "grad_norm": 3.64266300201416, "learning_rate": 3.4065524104028185e-06, "loss": 0.9692, "step": 7980 }, { "epoch": 0.6152482269503546, "grad_norm": 3.786377191543579, "learning_rate": 3.405369107924788e-06, "loss": 0.9424, "step": 7981 }, { "epoch": 0.6153253160653716, "grad_norm": 3.296741008758545, "learning_rate": 3.404185904863736e-06, "loss": 0.8991, "step": 7982 }, { "epoch": 0.6154024051803886, "grad_norm": 3.8608269691467285, "learning_rate": 3.4030028012934264e-06, "loss": 0.9511, "step": 7983 }, { "epoch": 0.6154794942954055, "grad_norm": 3.641268730163574, "learning_rate": 3.401819797287621e-06, "loss": 0.9326, "step": 7984 }, { "epoch": 0.6155565834104224, "grad_norm": 3.7189748287200928, "learning_rate": 3.400636892920076e-06, "loss": 0.901, "step": 7985 }, { "epoch": 0.6156336725254394, "grad_norm": 3.6073148250579834, "learning_rate": 3.3994540882645353e-06, "loss": 0.9221, "step": 7986 }, { "epoch": 0.6157107616404563, "grad_norm": 3.328263282775879, "learning_rate": 3.3982713833947447e-06, "loss": 0.8374, "step": 7987 }, { "epoch": 0.6157878507554734, "grad_norm": 3.7613234519958496, "learning_rate": 3.397088778384434e-06, "loss": 0.8323, "step": 7988 }, { "epoch": 0.6158649398704903, "grad_norm": 3.8894762992858887, "learning_rate": 3.39590627330734e-06, "loss": 0.9505, "step": 7989 }, { "epoch": 0.6159420289855072, "grad_norm": 3.8909053802490234, "learning_rate": 3.3947238682371803e-06, "loss": 0.9096, "step": 7990 }, { "epoch": 0.6160191181005242, "grad_norm": 3.90527081489563, "learning_rate": 3.3935415632476736e-06, "loss": 0.9503, "step": 7991 }, { "epoch": 0.6160962072155411, "grad_norm": 3.739124298095703, "learning_rate": 3.392359358412532e-06, "loss": 0.9485, "step": 7992 }, { "epoch": 0.6161732963305582, "grad_norm": 3.6810646057128906, "learning_rate": 3.3911772538054565e-06, "loss": 0.9624, "step": 7993 }, { "epoch": 0.6162503854455751, "grad_norm": 3.561396598815918, "learning_rate": 3.3899952495001486e-06, "loss": 0.9049, "step": 7994 }, { "epoch": 0.616327474560592, "grad_norm": 4.142645835876465, "learning_rate": 3.3888133455702964e-06, "loss": 0.9669, "step": 7995 }, { "epoch": 0.616404563675609, "grad_norm": 3.6202030181884766, "learning_rate": 3.387631542089589e-06, "loss": 0.9981, "step": 7996 }, { "epoch": 0.616481652790626, "grad_norm": 3.5489907264709473, "learning_rate": 3.3864498391317047e-06, "loss": 0.9013, "step": 7997 }, { "epoch": 0.616558741905643, "grad_norm": 3.5619547367095947, "learning_rate": 3.385268236770315e-06, "loss": 0.8911, "step": 7998 }, { "epoch": 0.6166358310206599, "grad_norm": 3.5192930698394775, "learning_rate": 3.3840867350790872e-06, "loss": 0.9838, "step": 7999 }, { "epoch": 0.6167129201356768, "grad_norm": 3.7306177616119385, "learning_rate": 3.3829053341316843e-06, "loss": 0.9779, "step": 8000 }, { "epoch": 0.6167900092506938, "grad_norm": 4.072969436645508, "learning_rate": 3.3817240340017575e-06, "loss": 0.9314, "step": 8001 }, { "epoch": 0.6168670983657107, "grad_norm": 4.617973804473877, "learning_rate": 3.3805428347629554e-06, "loss": 0.9557, "step": 8002 }, { "epoch": 0.6169441874807278, "grad_norm": 3.65148663520813, "learning_rate": 3.379361736488922e-06, "loss": 0.9209, "step": 8003 }, { "epoch": 0.6170212765957447, "grad_norm": 3.826519727706909, "learning_rate": 3.3781807392532893e-06, "loss": 1.0016, "step": 8004 }, { "epoch": 0.6170983657107616, "grad_norm": 3.791816473007202, "learning_rate": 3.376999843129689e-06, "loss": 0.8873, "step": 8005 }, { "epoch": 0.6171754548257786, "grad_norm": 3.338088274002075, "learning_rate": 3.375819048191742e-06, "loss": 0.9135, "step": 8006 }, { "epoch": 0.6172525439407955, "grad_norm": 3.48136043548584, "learning_rate": 3.3746383545130646e-06, "loss": 0.8923, "step": 8007 }, { "epoch": 0.6173296330558126, "grad_norm": 3.3619744777679443, "learning_rate": 3.37345776216727e-06, "loss": 0.9164, "step": 8008 }, { "epoch": 0.6174067221708295, "grad_norm": 3.594543218612671, "learning_rate": 3.3722772712279583e-06, "loss": 0.9498, "step": 8009 }, { "epoch": 0.6174838112858464, "grad_norm": 3.30615496635437, "learning_rate": 3.3710968817687284e-06, "loss": 0.8533, "step": 8010 }, { "epoch": 0.6175609004008634, "grad_norm": 3.645437479019165, "learning_rate": 3.369916593863173e-06, "loss": 0.9808, "step": 8011 }, { "epoch": 0.6176379895158803, "grad_norm": 3.909817695617676, "learning_rate": 3.3687364075848738e-06, "loss": 1.0065, "step": 8012 }, { "epoch": 0.6177150786308974, "grad_norm": 4.033278465270996, "learning_rate": 3.3675563230074126e-06, "loss": 0.9278, "step": 8013 }, { "epoch": 0.6177921677459143, "grad_norm": 3.6392765045166016, "learning_rate": 3.366376340204359e-06, "loss": 0.8868, "step": 8014 }, { "epoch": 0.6178692568609312, "grad_norm": 3.7995078563690186, "learning_rate": 3.3651964592492805e-06, "loss": 1.0029, "step": 8015 }, { "epoch": 0.6179463459759482, "grad_norm": 4.048414707183838, "learning_rate": 3.3640166802157357e-06, "loss": 0.9189, "step": 8016 }, { "epoch": 0.6180234350909651, "grad_norm": 3.517993927001953, "learning_rate": 3.362837003177278e-06, "loss": 0.9468, "step": 8017 }, { "epoch": 0.6181005242059822, "grad_norm": 3.915440559387207, "learning_rate": 3.3616574282074533e-06, "loss": 0.9799, "step": 8018 }, { "epoch": 0.6181776133209991, "grad_norm": 3.5532066822052, "learning_rate": 3.360477955379804e-06, "loss": 0.8911, "step": 8019 }, { "epoch": 0.618254702436016, "grad_norm": 3.4006614685058594, "learning_rate": 3.3592985847678617e-06, "loss": 0.9145, "step": 8020 }, { "epoch": 0.618331791551033, "grad_norm": 3.6829326152801514, "learning_rate": 3.3581193164451555e-06, "loss": 1.0141, "step": 8021 }, { "epoch": 0.6184088806660499, "grad_norm": 3.5860280990600586, "learning_rate": 3.3569401504852073e-06, "loss": 0.8548, "step": 8022 }, { "epoch": 0.618485969781067, "grad_norm": 3.718271255493164, "learning_rate": 3.35576108696153e-06, "loss": 0.9893, "step": 8023 }, { "epoch": 0.6185630588960839, "grad_norm": 3.679704189300537, "learning_rate": 3.354582125947634e-06, "loss": 0.9285, "step": 8024 }, { "epoch": 0.6186401480111008, "grad_norm": 3.5176236629486084, "learning_rate": 3.3534032675170205e-06, "loss": 0.9224, "step": 8025 }, { "epoch": 0.6187172371261178, "grad_norm": 3.7090749740600586, "learning_rate": 3.3522245117431845e-06, "loss": 0.8834, "step": 8026 }, { "epoch": 0.6187943262411347, "grad_norm": 3.7500243186950684, "learning_rate": 3.3510458586996173e-06, "loss": 1.0048, "step": 8027 }, { "epoch": 0.6188714153561518, "grad_norm": 3.5407137870788574, "learning_rate": 3.3498673084598e-06, "loss": 1.0076, "step": 8028 }, { "epoch": 0.6189485044711687, "grad_norm": 3.900679588317871, "learning_rate": 3.348688861097209e-06, "loss": 0.9704, "step": 8029 }, { "epoch": 0.6190255935861856, "grad_norm": 3.705470323562622, "learning_rate": 3.347510516685317e-06, "loss": 0.9472, "step": 8030 }, { "epoch": 0.6191026827012026, "grad_norm": 3.6836581230163574, "learning_rate": 3.3463322752975846e-06, "loss": 0.8369, "step": 8031 }, { "epoch": 0.6191797718162195, "grad_norm": 3.5624961853027344, "learning_rate": 3.34515413700747e-06, "loss": 0.9787, "step": 8032 }, { "epoch": 0.6192568609312366, "grad_norm": 3.8100168704986572, "learning_rate": 3.3439761018884233e-06, "loss": 1.0565, "step": 8033 }, { "epoch": 0.6193339500462535, "grad_norm": 3.6078031063079834, "learning_rate": 3.342798170013892e-06, "loss": 0.8859, "step": 8034 }, { "epoch": 0.6194110391612704, "grad_norm": 3.7805142402648926, "learning_rate": 3.3416203414573113e-06, "loss": 0.9629, "step": 8035 }, { "epoch": 0.6194881282762874, "grad_norm": 3.473383903503418, "learning_rate": 3.3404426162921123e-06, "loss": 0.8921, "step": 8036 }, { "epoch": 0.6195652173913043, "grad_norm": 3.560927629470825, "learning_rate": 3.3392649945917235e-06, "loss": 0.8671, "step": 8037 }, { "epoch": 0.6196423065063213, "grad_norm": 3.80720591545105, "learning_rate": 3.338087476429559e-06, "loss": 0.9247, "step": 8038 }, { "epoch": 0.6197193956213383, "grad_norm": 3.5851659774780273, "learning_rate": 3.3369100618790336e-06, "loss": 0.9808, "step": 8039 }, { "epoch": 0.6197964847363552, "grad_norm": 3.476499319076538, "learning_rate": 3.335732751013553e-06, "loss": 0.8289, "step": 8040 }, { "epoch": 0.6198735738513722, "grad_norm": 3.651381731033325, "learning_rate": 3.334555543906516e-06, "loss": 0.8949, "step": 8041 }, { "epoch": 0.6199506629663891, "grad_norm": 3.6143252849578857, "learning_rate": 3.333378440631315e-06, "loss": 0.8938, "step": 8042 }, { "epoch": 0.6200277520814061, "grad_norm": 3.81123685836792, "learning_rate": 3.3322014412613364e-06, "loss": 0.8147, "step": 8043 }, { "epoch": 0.6201048411964231, "grad_norm": 3.5519094467163086, "learning_rate": 3.3310245458699595e-06, "loss": 0.8491, "step": 8044 }, { "epoch": 0.62018193031144, "grad_norm": 3.531566619873047, "learning_rate": 3.3298477545305595e-06, "loss": 0.8893, "step": 8045 }, { "epoch": 0.620259019426457, "grad_norm": 3.8642385005950928, "learning_rate": 3.3286710673165012e-06, "loss": 0.9373, "step": 8046 }, { "epoch": 0.6203361085414739, "grad_norm": 4.416684627532959, "learning_rate": 3.3274944843011463e-06, "loss": 1.0649, "step": 8047 }, { "epoch": 0.620413197656491, "grad_norm": 3.7867462635040283, "learning_rate": 3.3263180055578493e-06, "loss": 0.9491, "step": 8048 }, { "epoch": 0.6204902867715079, "grad_norm": 3.4176316261291504, "learning_rate": 3.325141631159954e-06, "loss": 0.9123, "step": 8049 }, { "epoch": 0.6205673758865248, "grad_norm": 3.9313580989837646, "learning_rate": 3.3239653611808054e-06, "loss": 0.938, "step": 8050 }, { "epoch": 0.6206444650015418, "grad_norm": 3.718086004257202, "learning_rate": 3.3227891956937366e-06, "loss": 0.9519, "step": 8051 }, { "epoch": 0.6207215541165587, "grad_norm": 3.9227771759033203, "learning_rate": 3.321613134772073e-06, "loss": 1.0329, "step": 8052 }, { "epoch": 0.6207986432315757, "grad_norm": 3.459362506866455, "learning_rate": 3.32043717848914e-06, "loss": 0.92, "step": 8053 }, { "epoch": 0.6208757323465927, "grad_norm": 3.933849811553955, "learning_rate": 3.3192613269182483e-06, "loss": 0.9814, "step": 8054 }, { "epoch": 0.6209528214616096, "grad_norm": 4.124648094177246, "learning_rate": 3.3180855801327083e-06, "loss": 0.979, "step": 8055 }, { "epoch": 0.6210299105766266, "grad_norm": 3.7071919441223145, "learning_rate": 3.3169099382058233e-06, "loss": 0.9179, "step": 8056 }, { "epoch": 0.6211069996916435, "grad_norm": 3.726698398590088, "learning_rate": 3.315734401210885e-06, "loss": 0.9425, "step": 8057 }, { "epoch": 0.6211840888066605, "grad_norm": 3.652276039123535, "learning_rate": 3.3145589692211837e-06, "loss": 0.9226, "step": 8058 }, { "epoch": 0.6212611779216775, "grad_norm": 3.5816662311553955, "learning_rate": 3.313383642310003e-06, "loss": 0.7326, "step": 8059 }, { "epoch": 0.6213382670366944, "grad_norm": 3.948348045349121, "learning_rate": 3.3122084205506153e-06, "loss": 0.9797, "step": 8060 }, { "epoch": 0.6214153561517114, "grad_norm": 3.8742499351501465, "learning_rate": 3.3110333040162934e-06, "loss": 1.0117, "step": 8061 }, { "epoch": 0.6214924452667283, "grad_norm": 3.3892946243286133, "learning_rate": 3.309858292780296e-06, "loss": 0.8751, "step": 8062 }, { "epoch": 0.6215695343817453, "grad_norm": 3.410844087600708, "learning_rate": 3.308683386915881e-06, "loss": 0.8663, "step": 8063 }, { "epoch": 0.6216466234967623, "grad_norm": 3.326235055923462, "learning_rate": 3.3075085864962975e-06, "loss": 0.8682, "step": 8064 }, { "epoch": 0.6217237126117792, "grad_norm": 4.0448079109191895, "learning_rate": 3.306333891594787e-06, "loss": 0.8579, "step": 8065 }, { "epoch": 0.6218008017267962, "grad_norm": 3.6627490520477295, "learning_rate": 3.3051593022845867e-06, "loss": 0.9246, "step": 8066 }, { "epoch": 0.6218778908418131, "grad_norm": 3.7059576511383057, "learning_rate": 3.3039848186389278e-06, "loss": 0.7995, "step": 8067 }, { "epoch": 0.6219549799568301, "grad_norm": 3.518906831741333, "learning_rate": 3.3028104407310302e-06, "loss": 0.9744, "step": 8068 }, { "epoch": 0.622032069071847, "grad_norm": 3.5456724166870117, "learning_rate": 3.3016361686341115e-06, "loss": 0.9983, "step": 8069 }, { "epoch": 0.622109158186864, "grad_norm": 4.1023101806640625, "learning_rate": 3.3004620024213824e-06, "loss": 0.9888, "step": 8070 }, { "epoch": 0.622186247301881, "grad_norm": 3.6297264099121094, "learning_rate": 3.2992879421660447e-06, "loss": 0.9784, "step": 8071 }, { "epoch": 0.6222633364168979, "grad_norm": 3.9697625637054443, "learning_rate": 3.2981139879412965e-06, "loss": 1.038, "step": 8072 }, { "epoch": 0.6223404255319149, "grad_norm": 3.5402143001556396, "learning_rate": 3.296940139820326e-06, "loss": 0.8862, "step": 8073 }, { "epoch": 0.6224175146469318, "grad_norm": 3.9144885540008545, "learning_rate": 3.2957663978763172e-06, "loss": 0.9882, "step": 8074 }, { "epoch": 0.6224946037619488, "grad_norm": 3.5432751178741455, "learning_rate": 3.294592762182448e-06, "loss": 0.9141, "step": 8075 }, { "epoch": 0.6225716928769658, "grad_norm": 3.5189805030822754, "learning_rate": 3.2934192328118866e-06, "loss": 0.7741, "step": 8076 }, { "epoch": 0.6226487819919827, "grad_norm": 3.9740869998931885, "learning_rate": 3.292245809837796e-06, "loss": 0.9351, "step": 8077 }, { "epoch": 0.6227258711069997, "grad_norm": 3.5222132205963135, "learning_rate": 3.2910724933333365e-06, "loss": 0.9749, "step": 8078 }, { "epoch": 0.6228029602220166, "grad_norm": 4.026155948638916, "learning_rate": 3.289899283371657e-06, "loss": 0.9626, "step": 8079 }, { "epoch": 0.6228800493370336, "grad_norm": 3.8821022510528564, "learning_rate": 3.2887261800258985e-06, "loss": 1.0371, "step": 8080 }, { "epoch": 0.6229571384520506, "grad_norm": 3.676262855529785, "learning_rate": 3.287553183369201e-06, "loss": 1.0328, "step": 8081 }, { "epoch": 0.6230342275670675, "grad_norm": 3.552816152572632, "learning_rate": 3.2863802934746938e-06, "loss": 0.8604, "step": 8082 }, { "epoch": 0.6231113166820845, "grad_norm": 4.076422214508057, "learning_rate": 3.285207510415499e-06, "loss": 0.8237, "step": 8083 }, { "epoch": 0.6231884057971014, "grad_norm": 3.790148973464966, "learning_rate": 3.2840348342647357e-06, "loss": 0.8949, "step": 8084 }, { "epoch": 0.6232654949121184, "grad_norm": 3.885585308074951, "learning_rate": 3.2828622650955133e-06, "loss": 0.9832, "step": 8085 }, { "epoch": 0.6233425840271354, "grad_norm": 4.014956474304199, "learning_rate": 3.281689802980934e-06, "loss": 0.9888, "step": 8086 }, { "epoch": 0.6234196731421523, "grad_norm": 3.65321946144104, "learning_rate": 3.280517447994097e-06, "loss": 0.9624, "step": 8087 }, { "epoch": 0.6234967622571693, "grad_norm": 3.9095733165740967, "learning_rate": 3.279345200208093e-06, "loss": 0.8794, "step": 8088 }, { "epoch": 0.6235738513721862, "grad_norm": 3.463289737701416, "learning_rate": 3.2781730596960027e-06, "loss": 0.9902, "step": 8089 }, { "epoch": 0.6236509404872032, "grad_norm": 4.12209939956665, "learning_rate": 3.277001026530906e-06, "loss": 0.8898, "step": 8090 }, { "epoch": 0.6237280296022202, "grad_norm": 3.618441104888916, "learning_rate": 3.27582910078587e-06, "loss": 0.9217, "step": 8091 }, { "epoch": 0.6238051187172371, "grad_norm": 3.7258079051971436, "learning_rate": 3.2746572825339607e-06, "loss": 0.9826, "step": 8092 }, { "epoch": 0.6238822078322541, "grad_norm": 3.7279441356658936, "learning_rate": 3.273485571848234e-06, "loss": 0.9179, "step": 8093 }, { "epoch": 0.623959296947271, "grad_norm": 3.8348352909088135, "learning_rate": 3.27231396880174e-06, "loss": 1.006, "step": 8094 }, { "epoch": 0.624036386062288, "grad_norm": 3.53607177734375, "learning_rate": 3.2711424734675212e-06, "loss": 0.8691, "step": 8095 }, { "epoch": 0.624113475177305, "grad_norm": 3.5375266075134277, "learning_rate": 3.2699710859186175e-06, "loss": 0.9688, "step": 8096 }, { "epoch": 0.6241905642923219, "grad_norm": 4.027289390563965, "learning_rate": 3.2687998062280546e-06, "loss": 1.0257, "step": 8097 }, { "epoch": 0.6242676534073389, "grad_norm": 3.652118682861328, "learning_rate": 3.2676286344688575e-06, "loss": 0.923, "step": 8098 }, { "epoch": 0.6243447425223558, "grad_norm": 4.014166355133057, "learning_rate": 3.2664575707140446e-06, "loss": 0.951, "step": 8099 }, { "epoch": 0.6244218316373727, "grad_norm": 3.562201738357544, "learning_rate": 3.265286615036622e-06, "loss": 0.9807, "step": 8100 }, { "epoch": 0.6244989207523898, "grad_norm": 3.267332077026367, "learning_rate": 3.2641157675095965e-06, "loss": 0.9209, "step": 8101 }, { "epoch": 0.6245760098674067, "grad_norm": 3.832848310470581, "learning_rate": 3.2629450282059603e-06, "loss": 0.8897, "step": 8102 }, { "epoch": 0.6246530989824237, "grad_norm": 3.6828267574310303, "learning_rate": 3.2617743971987063e-06, "loss": 0.8706, "step": 8103 }, { "epoch": 0.6247301880974406, "grad_norm": 3.771867275238037, "learning_rate": 3.260603874560817e-06, "loss": 0.9786, "step": 8104 }, { "epoch": 0.6248072772124575, "grad_norm": 3.621521234512329, "learning_rate": 3.2594334603652668e-06, "loss": 0.9209, "step": 8105 }, { "epoch": 0.6248843663274746, "grad_norm": 3.851837635040283, "learning_rate": 3.2582631546850254e-06, "loss": 0.893, "step": 8106 }, { "epoch": 0.6249614554424915, "grad_norm": 4.190458297729492, "learning_rate": 3.2570929575930563e-06, "loss": 0.9945, "step": 8107 }, { "epoch": 0.6250385445575085, "grad_norm": 3.653280019760132, "learning_rate": 3.2559228691623134e-06, "loss": 0.9732, "step": 8108 }, { "epoch": 0.6251156336725254, "grad_norm": 3.965890645980835, "learning_rate": 3.254752889465749e-06, "loss": 0.9156, "step": 8109 }, { "epoch": 0.6251927227875425, "grad_norm": 3.464805841445923, "learning_rate": 3.2535830185763006e-06, "loss": 0.7856, "step": 8110 }, { "epoch": 0.6252698119025594, "grad_norm": 3.8827240467071533, "learning_rate": 3.252413256566907e-06, "loss": 0.9146, "step": 8111 }, { "epoch": 0.6253469010175763, "grad_norm": 3.4386796951293945, "learning_rate": 3.2512436035104968e-06, "loss": 0.9379, "step": 8112 }, { "epoch": 0.6254239901325933, "grad_norm": 3.7330002784729004, "learning_rate": 3.2500740594799895e-06, "loss": 0.942, "step": 8113 }, { "epoch": 0.6255010792476102, "grad_norm": 3.6872072219848633, "learning_rate": 3.2489046245483016e-06, "loss": 0.8975, "step": 8114 }, { "epoch": 0.6255781683626273, "grad_norm": 3.910029888153076, "learning_rate": 3.2477352987883426e-06, "loss": 1.0826, "step": 8115 }, { "epoch": 0.6256552574776442, "grad_norm": 4.114757537841797, "learning_rate": 3.246566082273011e-06, "loss": 0.9402, "step": 8116 }, { "epoch": 0.6257323465926611, "grad_norm": 3.61517071723938, "learning_rate": 3.245396975075203e-06, "loss": 0.9002, "step": 8117 }, { "epoch": 0.6258094357076781, "grad_norm": 3.6216084957122803, "learning_rate": 3.2442279772678076e-06, "loss": 0.9916, "step": 8118 }, { "epoch": 0.625886524822695, "grad_norm": 3.727811336517334, "learning_rate": 3.2430590889237027e-06, "loss": 1.0077, "step": 8119 }, { "epoch": 0.625963613937712, "grad_norm": 3.697965145111084, "learning_rate": 3.241890310115766e-06, "loss": 1.0235, "step": 8120 }, { "epoch": 0.626040703052729, "grad_norm": 3.661677122116089, "learning_rate": 3.2407216409168597e-06, "loss": 0.8098, "step": 8121 }, { "epoch": 0.6261177921677459, "grad_norm": 3.530116558074951, "learning_rate": 3.2395530813998506e-06, "loss": 0.904, "step": 8122 }, { "epoch": 0.6261948812827629, "grad_norm": 3.739652633666992, "learning_rate": 3.2383846316375876e-06, "loss": 0.9422, "step": 8123 }, { "epoch": 0.6262719703977798, "grad_norm": 4.194034576416016, "learning_rate": 3.2372162917029205e-06, "loss": 1.0163, "step": 8124 }, { "epoch": 0.6263490595127968, "grad_norm": 3.3283534049987793, "learning_rate": 3.236048061668688e-06, "loss": 0.7624, "step": 8125 }, { "epoch": 0.6264261486278138, "grad_norm": 3.7306947708129883, "learning_rate": 3.2348799416077225e-06, "loss": 0.9407, "step": 8126 }, { "epoch": 0.6265032377428307, "grad_norm": 3.8820812702178955, "learning_rate": 3.2337119315928513e-06, "loss": 0.9529, "step": 8127 }, { "epoch": 0.6265803268578477, "grad_norm": 3.882148027420044, "learning_rate": 3.2325440316968927e-06, "loss": 0.93, "step": 8128 }, { "epoch": 0.6266574159728646, "grad_norm": 3.439239025115967, "learning_rate": 3.2313762419926597e-06, "loss": 0.9563, "step": 8129 }, { "epoch": 0.6267345050878816, "grad_norm": 3.5904295444488525, "learning_rate": 3.2302085625529596e-06, "loss": 1.0631, "step": 8130 }, { "epoch": 0.6268115942028986, "grad_norm": 3.6750895977020264, "learning_rate": 3.2290409934505884e-06, "loss": 0.9154, "step": 8131 }, { "epoch": 0.6268886833179155, "grad_norm": 3.750882625579834, "learning_rate": 3.227873534758339e-06, "loss": 0.8514, "step": 8132 }, { "epoch": 0.6269657724329325, "grad_norm": 3.87109637260437, "learning_rate": 3.226706186548998e-06, "loss": 0.9765, "step": 8133 }, { "epoch": 0.6270428615479494, "grad_norm": 4.06139612197876, "learning_rate": 3.22553894889534e-06, "loss": 0.9913, "step": 8134 }, { "epoch": 0.6271199506629664, "grad_norm": 3.7100861072540283, "learning_rate": 3.2243718218701393e-06, "loss": 1.0381, "step": 8135 }, { "epoch": 0.6271970397779834, "grad_norm": 3.1716091632843018, "learning_rate": 3.2232048055461608e-06, "loss": 0.8383, "step": 8136 }, { "epoch": 0.6272741288930003, "grad_norm": 3.715208053588867, "learning_rate": 3.2220378999961588e-06, "loss": 0.943, "step": 8137 }, { "epoch": 0.6273512180080173, "grad_norm": 3.427281618118286, "learning_rate": 3.2208711052928867e-06, "loss": 0.9138, "step": 8138 }, { "epoch": 0.6274283071230342, "grad_norm": 3.5145509243011475, "learning_rate": 3.219704421509085e-06, "loss": 0.8756, "step": 8139 }, { "epoch": 0.6275053962380512, "grad_norm": 4.032465934753418, "learning_rate": 3.218537848717493e-06, "loss": 0.9441, "step": 8140 }, { "epoch": 0.6275824853530682, "grad_norm": 3.699963331222534, "learning_rate": 3.2173713869908406e-06, "loss": 0.9135, "step": 8141 }, { "epoch": 0.6276595744680851, "grad_norm": 3.3463034629821777, "learning_rate": 3.2162050364018484e-06, "loss": 0.8761, "step": 8142 }, { "epoch": 0.6277366635831021, "grad_norm": 3.821621894836426, "learning_rate": 3.215038797023234e-06, "loss": 0.9123, "step": 8143 }, { "epoch": 0.627813752698119, "grad_norm": 3.723275899887085, "learning_rate": 3.2138726689277067e-06, "loss": 0.9333, "step": 8144 }, { "epoch": 0.627890841813136, "grad_norm": 3.7927725315093994, "learning_rate": 3.2127066521879674e-06, "loss": 0.9102, "step": 8145 }, { "epoch": 0.627967930928153, "grad_norm": 3.668217182159424, "learning_rate": 3.211540746876711e-06, "loss": 0.9221, "step": 8146 }, { "epoch": 0.6280450200431699, "grad_norm": 3.8914999961853027, "learning_rate": 3.2103749530666283e-06, "loss": 0.9604, "step": 8147 }, { "epoch": 0.6281221091581869, "grad_norm": 3.947474718093872, "learning_rate": 3.2092092708303973e-06, "loss": 0.9765, "step": 8148 }, { "epoch": 0.6281991982732038, "grad_norm": 3.575840711593628, "learning_rate": 3.2080437002406943e-06, "loss": 0.9536, "step": 8149 }, { "epoch": 0.6282762873882208, "grad_norm": 3.5878658294677734, "learning_rate": 3.206878241370185e-06, "loss": 0.9539, "step": 8150 }, { "epoch": 0.6283533765032377, "grad_norm": 4.496673107147217, "learning_rate": 3.2057128942915306e-06, "loss": 0.9298, "step": 8151 }, { "epoch": 0.6284304656182547, "grad_norm": 3.7084543704986572, "learning_rate": 3.204547659077385e-06, "loss": 0.9288, "step": 8152 }, { "epoch": 0.6285075547332717, "grad_norm": 3.8590118885040283, "learning_rate": 3.2033825358003936e-06, "loss": 0.9057, "step": 8153 }, { "epoch": 0.6285846438482886, "grad_norm": 3.7039713859558105, "learning_rate": 3.2022175245331954e-06, "loss": 0.9582, "step": 8154 }, { "epoch": 0.6286617329633056, "grad_norm": 3.4202165603637695, "learning_rate": 3.2010526253484246e-06, "loss": 0.8993, "step": 8155 }, { "epoch": 0.6287388220783225, "grad_norm": 3.575178861618042, "learning_rate": 3.199887838318705e-06, "loss": 0.8471, "step": 8156 }, { "epoch": 0.6288159111933395, "grad_norm": 3.737819194793701, "learning_rate": 3.1987231635166565e-06, "loss": 0.8481, "step": 8157 }, { "epoch": 0.6288930003083565, "grad_norm": 3.638150930404663, "learning_rate": 3.197558601014889e-06, "loss": 0.9176, "step": 8158 }, { "epoch": 0.6289700894233734, "grad_norm": 3.68680477142334, "learning_rate": 3.1963941508860076e-06, "loss": 0.9781, "step": 8159 }, { "epoch": 0.6290471785383904, "grad_norm": 3.491912841796875, "learning_rate": 3.1952298132026107e-06, "loss": 0.9208, "step": 8160 }, { "epoch": 0.6291242676534073, "grad_norm": 3.932940721511841, "learning_rate": 3.194065588037286e-06, "loss": 0.9005, "step": 8161 }, { "epoch": 0.6292013567684243, "grad_norm": 3.7449350357055664, "learning_rate": 3.1929014754626197e-06, "loss": 1.0379, "step": 8162 }, { "epoch": 0.6292784458834413, "grad_norm": 3.840050220489502, "learning_rate": 3.191737475551188e-06, "loss": 0.9366, "step": 8163 }, { "epoch": 0.6293555349984582, "grad_norm": 3.678901195526123, "learning_rate": 3.1905735883755582e-06, "loss": 0.8937, "step": 8164 }, { "epoch": 0.6294326241134752, "grad_norm": 4.070024013519287, "learning_rate": 3.1894098140082943e-06, "loss": 1.0219, "step": 8165 }, { "epoch": 0.6295097132284921, "grad_norm": 3.705054759979248, "learning_rate": 3.18824615252195e-06, "loss": 1.0407, "step": 8166 }, { "epoch": 0.629586802343509, "grad_norm": 3.3765339851379395, "learning_rate": 3.1870826039890766e-06, "loss": 0.7974, "step": 8167 }, { "epoch": 0.6296638914585261, "grad_norm": 3.9743378162384033, "learning_rate": 3.1859191684822132e-06, "loss": 0.9537, "step": 8168 }, { "epoch": 0.629740980573543, "grad_norm": 3.8855860233306885, "learning_rate": 3.1847558460738936e-06, "loss": 0.9006, "step": 8169 }, { "epoch": 0.62981806968856, "grad_norm": 3.418191909790039, "learning_rate": 3.1835926368366465e-06, "loss": 0.8984, "step": 8170 }, { "epoch": 0.6298951588035769, "grad_norm": 3.7673134803771973, "learning_rate": 3.1824295408429907e-06, "loss": 0.9385, "step": 8171 }, { "epoch": 0.6299722479185939, "grad_norm": 3.6283762454986572, "learning_rate": 3.181266558165439e-06, "loss": 0.94, "step": 8172 }, { "epoch": 0.6300493370336109, "grad_norm": 3.8887081146240234, "learning_rate": 3.180103688876499e-06, "loss": 1.0551, "step": 8173 }, { "epoch": 0.6301264261486278, "grad_norm": 4.249144554138184, "learning_rate": 3.1789409330486683e-06, "loss": 1.1189, "step": 8174 }, { "epoch": 0.6302035152636448, "grad_norm": 3.9418768882751465, "learning_rate": 3.1777782907544392e-06, "loss": 0.9109, "step": 8175 }, { "epoch": 0.6302806043786617, "grad_norm": 3.891338348388672, "learning_rate": 3.176615762066295e-06, "loss": 0.9539, "step": 8176 }, { "epoch": 0.6303576934936787, "grad_norm": 3.6028475761413574, "learning_rate": 3.175453347056715e-06, "loss": 0.8756, "step": 8177 }, { "epoch": 0.6304347826086957, "grad_norm": 3.6023495197296143, "learning_rate": 3.17429104579817e-06, "loss": 0.9605, "step": 8178 }, { "epoch": 0.6305118717237126, "grad_norm": 4.147401332855225, "learning_rate": 3.1731288583631214e-06, "loss": 0.9424, "step": 8179 }, { "epoch": 0.6305889608387296, "grad_norm": 3.7113969326019287, "learning_rate": 3.1719667848240276e-06, "loss": 0.9408, "step": 8180 }, { "epoch": 0.6306660499537465, "grad_norm": 3.5659947395324707, "learning_rate": 3.1708048252533376e-06, "loss": 0.9135, "step": 8181 }, { "epoch": 0.6307431390687634, "grad_norm": 3.6994526386260986, "learning_rate": 3.1696429797234918e-06, "loss": 0.8658, "step": 8182 }, { "epoch": 0.6308202281837805, "grad_norm": 3.7129364013671875, "learning_rate": 3.168481248306927e-06, "loss": 0.9827, "step": 8183 }, { "epoch": 0.6308973172987974, "grad_norm": 3.838097095489502, "learning_rate": 3.1673196310760723e-06, "loss": 0.9963, "step": 8184 }, { "epoch": 0.6309744064138144, "grad_norm": 4.0744242668151855, "learning_rate": 3.166158128103345e-06, "loss": 0.9455, "step": 8185 }, { "epoch": 0.6310514955288313, "grad_norm": 3.745927572250366, "learning_rate": 3.164996739461162e-06, "loss": 0.8731, "step": 8186 }, { "epoch": 0.6311285846438482, "grad_norm": 3.8844759464263916, "learning_rate": 3.163835465221927e-06, "loss": 0.8394, "step": 8187 }, { "epoch": 0.6312056737588653, "grad_norm": 3.549527406692505, "learning_rate": 3.162674305458042e-06, "loss": 0.9107, "step": 8188 }, { "epoch": 0.6312827628738822, "grad_norm": 3.6786959171295166, "learning_rate": 3.1615132602418986e-06, "loss": 0.928, "step": 8189 }, { "epoch": 0.6313598519888992, "grad_norm": 3.693917751312256, "learning_rate": 3.160352329645881e-06, "loss": 1.0224, "step": 8190 }, { "epoch": 0.6314369411039161, "grad_norm": 3.7990047931671143, "learning_rate": 3.159191513742368e-06, "loss": 0.8727, "step": 8191 }, { "epoch": 0.631514030218933, "grad_norm": 3.8932435512542725, "learning_rate": 3.158030812603731e-06, "loss": 0.9425, "step": 8192 }, { "epoch": 0.6315911193339501, "grad_norm": 3.7320001125335693, "learning_rate": 3.156870226302332e-06, "loss": 0.804, "step": 8193 }, { "epoch": 0.631668208448967, "grad_norm": 3.685913562774658, "learning_rate": 3.155709754910529e-06, "loss": 0.9927, "step": 8194 }, { "epoch": 0.631745297563984, "grad_norm": 4.240044593811035, "learning_rate": 3.1545493985006713e-06, "loss": 1.0915, "step": 8195 }, { "epoch": 0.6318223866790009, "grad_norm": 4.133537292480469, "learning_rate": 3.1533891571451002e-06, "loss": 0.9907, "step": 8196 }, { "epoch": 0.6318994757940178, "grad_norm": 3.4915730953216553, "learning_rate": 3.152229030916152e-06, "loss": 0.8439, "step": 8197 }, { "epoch": 0.6319765649090349, "grad_norm": 3.9785425662994385, "learning_rate": 3.1510690198861533e-06, "loss": 0.9674, "step": 8198 }, { "epoch": 0.6320536540240518, "grad_norm": 3.388223886489868, "learning_rate": 3.149909124127425e-06, "loss": 0.8956, "step": 8199 }, { "epoch": 0.6321307431390688, "grad_norm": 3.3853883743286133, "learning_rate": 3.148749343712282e-06, "loss": 1.0348, "step": 8200 }, { "epoch": 0.6322078322540857, "grad_norm": 3.7054595947265625, "learning_rate": 3.1475896787130287e-06, "loss": 0.9161, "step": 8201 }, { "epoch": 0.6322849213691026, "grad_norm": 4.188024997711182, "learning_rate": 3.146430129201965e-06, "loss": 1.0198, "step": 8202 }, { "epoch": 0.6323620104841197, "grad_norm": 3.5970795154571533, "learning_rate": 3.1452706952513836e-06, "loss": 1.0027, "step": 8203 }, { "epoch": 0.6324390995991366, "grad_norm": 4.296309947967529, "learning_rate": 3.144111376933568e-06, "loss": 0.9452, "step": 8204 }, { "epoch": 0.6325161887141536, "grad_norm": 3.8872005939483643, "learning_rate": 3.142952174320797e-06, "loss": 0.985, "step": 8205 }, { "epoch": 0.6325932778291705, "grad_norm": 3.649055004119873, "learning_rate": 3.1417930874853386e-06, "loss": 0.8932, "step": 8206 }, { "epoch": 0.6326703669441874, "grad_norm": 3.9435665607452393, "learning_rate": 3.1406341164994574e-06, "loss": 0.9035, "step": 8207 }, { "epoch": 0.6327474560592045, "grad_norm": 3.8601176738739014, "learning_rate": 3.1394752614354106e-06, "loss": 0.8653, "step": 8208 }, { "epoch": 0.6328245451742214, "grad_norm": 3.7992372512817383, "learning_rate": 3.1383165223654444e-06, "loss": 0.9261, "step": 8209 }, { "epoch": 0.6329016342892384, "grad_norm": 3.477597236633301, "learning_rate": 3.137157899361799e-06, "loss": 0.9783, "step": 8210 }, { "epoch": 0.6329787234042553, "grad_norm": 3.3875021934509277, "learning_rate": 3.1359993924967124e-06, "loss": 0.8615, "step": 8211 }, { "epoch": 0.6330558125192722, "grad_norm": 3.7047359943389893, "learning_rate": 3.1348410018424115e-06, "loss": 1.0487, "step": 8212 }, { "epoch": 0.6331329016342893, "grad_norm": 3.5324010848999023, "learning_rate": 3.1336827274711124e-06, "loss": 0.8742, "step": 8213 }, { "epoch": 0.6332099907493062, "grad_norm": 3.7448694705963135, "learning_rate": 3.1325245694550292e-06, "loss": 0.9452, "step": 8214 }, { "epoch": 0.6332870798643232, "grad_norm": 4.035982608795166, "learning_rate": 3.1313665278663686e-06, "loss": 1.0678, "step": 8215 }, { "epoch": 0.6333641689793401, "grad_norm": 3.4959230422973633, "learning_rate": 3.130208602777326e-06, "loss": 0.8715, "step": 8216 }, { "epoch": 0.633441258094357, "grad_norm": 4.09905481338501, "learning_rate": 3.1290507942600936e-06, "loss": 0.9712, "step": 8217 }, { "epoch": 0.633518347209374, "grad_norm": 4.000631332397461, "learning_rate": 3.1278931023868543e-06, "loss": 0.8744, "step": 8218 }, { "epoch": 0.633595436324391, "grad_norm": 3.4521965980529785, "learning_rate": 3.126735527229784e-06, "loss": 0.8514, "step": 8219 }, { "epoch": 0.633672525439408, "grad_norm": 3.4077889919281006, "learning_rate": 3.125578068861051e-06, "loss": 0.9584, "step": 8220 }, { "epoch": 0.6337496145544249, "grad_norm": 3.76359224319458, "learning_rate": 3.124420727352819e-06, "loss": 0.8964, "step": 8221 }, { "epoch": 0.6338267036694418, "grad_norm": 3.4744224548339844, "learning_rate": 3.1232635027772397e-06, "loss": 0.9265, "step": 8222 }, { "epoch": 0.6339037927844589, "grad_norm": 3.7400758266448975, "learning_rate": 3.122106395206462e-06, "loss": 0.9967, "step": 8223 }, { "epoch": 0.6339808818994758, "grad_norm": 3.5406832695007324, "learning_rate": 3.1209494047126233e-06, "loss": 0.9603, "step": 8224 }, { "epoch": 0.6340579710144928, "grad_norm": 3.479562520980835, "learning_rate": 3.119792531367858e-06, "loss": 0.8891, "step": 8225 }, { "epoch": 0.6341350601295097, "grad_norm": 3.7099175453186035, "learning_rate": 3.1186357752442915e-06, "loss": 0.9093, "step": 8226 }, { "epoch": 0.6342121492445266, "grad_norm": 4.117136478424072, "learning_rate": 3.1174791364140394e-06, "loss": 0.8646, "step": 8227 }, { "epoch": 0.6342892383595437, "grad_norm": 3.6180074214935303, "learning_rate": 3.1163226149492133e-06, "loss": 0.7977, "step": 8228 }, { "epoch": 0.6343663274745606, "grad_norm": 3.70717191696167, "learning_rate": 3.1151662109219173e-06, "loss": 0.8866, "step": 8229 }, { "epoch": 0.6344434165895776, "grad_norm": 3.557631492614746, "learning_rate": 3.1140099244042454e-06, "loss": 0.8787, "step": 8230 }, { "epoch": 0.6345205057045945, "grad_norm": 3.4483120441436768, "learning_rate": 3.1128537554682868e-06, "loss": 0.8503, "step": 8231 }, { "epoch": 0.6345975948196114, "grad_norm": 3.401564121246338, "learning_rate": 3.111697704186124e-06, "loss": 0.8023, "step": 8232 }, { "epoch": 0.6346746839346284, "grad_norm": 3.990302562713623, "learning_rate": 3.110541770629828e-06, "loss": 0.9897, "step": 8233 }, { "epoch": 0.6347517730496454, "grad_norm": 3.496260166168213, "learning_rate": 3.109385954871469e-06, "loss": 1.0002, "step": 8234 }, { "epoch": 0.6348288621646624, "grad_norm": 3.497117757797241, "learning_rate": 3.108230256983102e-06, "loss": 0.9513, "step": 8235 }, { "epoch": 0.6349059512796793, "grad_norm": 3.9657294750213623, "learning_rate": 3.107074677036781e-06, "loss": 0.96, "step": 8236 }, { "epoch": 0.6349830403946962, "grad_norm": 3.5663013458251953, "learning_rate": 3.1059192151045507e-06, "loss": 0.8036, "step": 8237 }, { "epoch": 0.6350601295097132, "grad_norm": 3.7324275970458984, "learning_rate": 3.104763871258447e-06, "loss": 0.971, "step": 8238 }, { "epoch": 0.6351372186247302, "grad_norm": 4.242516994476318, "learning_rate": 3.1036086455705006e-06, "loss": 1.0418, "step": 8239 }, { "epoch": 0.6352143077397472, "grad_norm": 3.5605618953704834, "learning_rate": 3.102453538112734e-06, "loss": 0.9253, "step": 8240 }, { "epoch": 0.6352913968547641, "grad_norm": 3.748979330062866, "learning_rate": 3.1012985489571613e-06, "loss": 0.972, "step": 8241 }, { "epoch": 0.635368485969781, "grad_norm": 3.898329734802246, "learning_rate": 3.10014367817579e-06, "loss": 1.0175, "step": 8242 }, { "epoch": 0.635445575084798, "grad_norm": 3.484175682067871, "learning_rate": 3.098988925840621e-06, "loss": 0.8839, "step": 8243 }, { "epoch": 0.635522664199815, "grad_norm": 3.634960412979126, "learning_rate": 3.097834292023647e-06, "loss": 0.8899, "step": 8244 }, { "epoch": 0.635599753314832, "grad_norm": 3.7136380672454834, "learning_rate": 3.096679776796854e-06, "loss": 0.9849, "step": 8245 }, { "epoch": 0.6356768424298489, "grad_norm": 3.5262043476104736, "learning_rate": 3.0955253802322183e-06, "loss": 0.8174, "step": 8246 }, { "epoch": 0.6357539315448658, "grad_norm": 3.480539321899414, "learning_rate": 3.094371102401712e-06, "loss": 0.8963, "step": 8247 }, { "epoch": 0.6358310206598828, "grad_norm": 3.8594253063201904, "learning_rate": 3.0932169433772986e-06, "loss": 0.9647, "step": 8248 }, { "epoch": 0.6359081097748998, "grad_norm": 3.4248909950256348, "learning_rate": 3.0920629032309323e-06, "loss": 0.8798, "step": 8249 }, { "epoch": 0.6359851988899168, "grad_norm": 3.841808319091797, "learning_rate": 3.090908982034563e-06, "loss": 0.9655, "step": 8250 }, { "epoch": 0.6360622880049337, "grad_norm": 3.5504653453826904, "learning_rate": 3.0897551798601312e-06, "loss": 0.9077, "step": 8251 }, { "epoch": 0.6361393771199506, "grad_norm": 3.5525784492492676, "learning_rate": 3.0886014967795696e-06, "loss": 0.8604, "step": 8252 }, { "epoch": 0.6362164662349676, "grad_norm": 3.50411057472229, "learning_rate": 3.087447932864807e-06, "loss": 0.9307, "step": 8253 }, { "epoch": 0.6362935553499846, "grad_norm": 3.3735544681549072, "learning_rate": 3.086294488187758e-06, "loss": 0.8722, "step": 8254 }, { "epoch": 0.6363706444650016, "grad_norm": 3.8651130199432373, "learning_rate": 3.0851411628203378e-06, "loss": 0.9636, "step": 8255 }, { "epoch": 0.6364477335800185, "grad_norm": 4.263221263885498, "learning_rate": 3.083987956834449e-06, "loss": 0.946, "step": 8256 }, { "epoch": 0.6365248226950354, "grad_norm": 3.6470301151275635, "learning_rate": 3.082834870301987e-06, "loss": 0.9204, "step": 8257 }, { "epoch": 0.6366019118100524, "grad_norm": 3.6749472618103027, "learning_rate": 3.081681903294843e-06, "loss": 1.0162, "step": 8258 }, { "epoch": 0.6366790009250693, "grad_norm": 3.7581326961517334, "learning_rate": 3.080529055884896e-06, "loss": 0.8681, "step": 8259 }, { "epoch": 0.6367560900400864, "grad_norm": 3.6538708209991455, "learning_rate": 3.0793763281440225e-06, "loss": 0.9529, "step": 8260 }, { "epoch": 0.6368331791551033, "grad_norm": 3.6864404678344727, "learning_rate": 3.0782237201440863e-06, "loss": 1.0218, "step": 8261 }, { "epoch": 0.6369102682701202, "grad_norm": 3.453056573867798, "learning_rate": 3.077071231956948e-06, "loss": 0.8975, "step": 8262 }, { "epoch": 0.6369873573851372, "grad_norm": 3.943230390548706, "learning_rate": 3.07591886365446e-06, "loss": 0.9366, "step": 8263 }, { "epoch": 0.6370644465001541, "grad_norm": 3.7501060962677, "learning_rate": 3.0747666153084656e-06, "loss": 0.9883, "step": 8264 }, { "epoch": 0.6371415356151712, "grad_norm": 3.9778292179107666, "learning_rate": 3.0736144869908015e-06, "loss": 0.9615, "step": 8265 }, { "epoch": 0.6372186247301881, "grad_norm": 3.7227420806884766, "learning_rate": 3.072462478773298e-06, "loss": 0.9409, "step": 8266 }, { "epoch": 0.637295713845205, "grad_norm": 3.480952501296997, "learning_rate": 3.071310590727775e-06, "loss": 0.8985, "step": 8267 }, { "epoch": 0.637372802960222, "grad_norm": 3.7210822105407715, "learning_rate": 3.0701588229260478e-06, "loss": 0.93, "step": 8268 }, { "epoch": 0.6374498920752389, "grad_norm": 3.6173181533813477, "learning_rate": 3.0690071754399236e-06, "loss": 0.9362, "step": 8269 }, { "epoch": 0.637526981190256, "grad_norm": 3.7527718544006348, "learning_rate": 3.0678556483412005e-06, "loss": 0.9783, "step": 8270 }, { "epoch": 0.6376040703052729, "grad_norm": 3.4101197719573975, "learning_rate": 3.066704241701672e-06, "loss": 0.8431, "step": 8271 }, { "epoch": 0.6376811594202898, "grad_norm": 3.5629935264587402, "learning_rate": 3.06555295559312e-06, "loss": 0.9556, "step": 8272 }, { "epoch": 0.6377582485353068, "grad_norm": 3.466757297515869, "learning_rate": 3.0644017900873225e-06, "loss": 0.7801, "step": 8273 }, { "epoch": 0.6378353376503237, "grad_norm": 3.4312379360198975, "learning_rate": 3.06325074525605e-06, "loss": 0.8572, "step": 8274 }, { "epoch": 0.6379124267653408, "grad_norm": 3.8068385124206543, "learning_rate": 3.0620998211710617e-06, "loss": 0.9986, "step": 8275 }, { "epoch": 0.6379895158803577, "grad_norm": 3.604086399078369, "learning_rate": 3.0609490179041124e-06, "loss": 0.9039, "step": 8276 }, { "epoch": 0.6380666049953746, "grad_norm": 4.103906631469727, "learning_rate": 3.059798335526951e-06, "loss": 0.9925, "step": 8277 }, { "epoch": 0.6381436941103916, "grad_norm": 3.9479622840881348, "learning_rate": 3.0586477741113134e-06, "loss": 0.931, "step": 8278 }, { "epoch": 0.6382207832254085, "grad_norm": 3.976950168609619, "learning_rate": 3.0574973337289327e-06, "loss": 0.8651, "step": 8279 }, { "epoch": 0.6382978723404256, "grad_norm": 3.6093039512634277, "learning_rate": 3.0563470144515337e-06, "loss": 0.9052, "step": 8280 }, { "epoch": 0.6383749614554425, "grad_norm": 3.744586229324341, "learning_rate": 3.0551968163508303e-06, "loss": 0.9146, "step": 8281 }, { "epoch": 0.6384520505704595, "grad_norm": 3.3914260864257812, "learning_rate": 3.054046739498535e-06, "loss": 0.8381, "step": 8282 }, { "epoch": 0.6385291396854764, "grad_norm": 3.5384232997894287, "learning_rate": 3.052896783966346e-06, "loss": 0.8195, "step": 8283 }, { "epoch": 0.6386062288004933, "grad_norm": 3.407332420349121, "learning_rate": 3.051746949825958e-06, "loss": 0.9074, "step": 8284 }, { "epoch": 0.6386833179155104, "grad_norm": 4.286930084228516, "learning_rate": 3.0505972371490584e-06, "loss": 1.0354, "step": 8285 }, { "epoch": 0.6387604070305273, "grad_norm": 3.8700149059295654, "learning_rate": 3.0494476460073237e-06, "loss": 0.9878, "step": 8286 }, { "epoch": 0.6388374961455443, "grad_norm": 4.059021472930908, "learning_rate": 3.048298176472426e-06, "loss": 0.9244, "step": 8287 }, { "epoch": 0.6389145852605612, "grad_norm": 3.820525646209717, "learning_rate": 3.04714882861603e-06, "loss": 1.104, "step": 8288 }, { "epoch": 0.6389916743755781, "grad_norm": 3.6468265056610107, "learning_rate": 3.04599960250979e-06, "loss": 0.9207, "step": 8289 }, { "epoch": 0.6390687634905952, "grad_norm": 3.7343029975891113, "learning_rate": 3.044850498225354e-06, "loss": 0.8921, "step": 8290 }, { "epoch": 0.6391458526056121, "grad_norm": 3.6490302085876465, "learning_rate": 3.0437015158343654e-06, "loss": 0.9089, "step": 8291 }, { "epoch": 0.6392229417206291, "grad_norm": 3.772552490234375, "learning_rate": 3.0425526554084526e-06, "loss": 0.9522, "step": 8292 }, { "epoch": 0.639300030835646, "grad_norm": 3.4605162143707275, "learning_rate": 3.0414039170192466e-06, "loss": 0.8253, "step": 8293 }, { "epoch": 0.6393771199506629, "grad_norm": 3.6937575340270996, "learning_rate": 3.0402553007383607e-06, "loss": 0.7973, "step": 8294 }, { "epoch": 0.63945420906568, "grad_norm": 3.3988912105560303, "learning_rate": 3.0391068066374073e-06, "loss": 0.8901, "step": 8295 }, { "epoch": 0.6395312981806969, "grad_norm": 3.475071668624878, "learning_rate": 3.03795843478799e-06, "loss": 0.8047, "step": 8296 }, { "epoch": 0.6396083872957139, "grad_norm": 3.693103075027466, "learning_rate": 3.0368101852617017e-06, "loss": 0.926, "step": 8297 }, { "epoch": 0.6396854764107308, "grad_norm": 3.585578203201294, "learning_rate": 3.03566205813013e-06, "loss": 0.9834, "step": 8298 }, { "epoch": 0.6397625655257477, "grad_norm": 3.4358348846435547, "learning_rate": 3.034514053464854e-06, "loss": 0.8158, "step": 8299 }, { "epoch": 0.6398396546407648, "grad_norm": 3.7729804515838623, "learning_rate": 3.0333661713374506e-06, "loss": 0.8983, "step": 8300 }, { "epoch": 0.6399167437557817, "grad_norm": 3.628103256225586, "learning_rate": 3.0322184118194797e-06, "loss": 0.8904, "step": 8301 }, { "epoch": 0.6399938328707987, "grad_norm": 3.9679834842681885, "learning_rate": 3.0310707749824987e-06, "loss": 0.894, "step": 8302 }, { "epoch": 0.6400709219858156, "grad_norm": 3.610292911529541, "learning_rate": 3.0299232608980587e-06, "loss": 0.9125, "step": 8303 }, { "epoch": 0.6401480111008325, "grad_norm": 3.601590394973755, "learning_rate": 3.0287758696377e-06, "loss": 0.8971, "step": 8304 }, { "epoch": 0.6402251002158496, "grad_norm": 3.709563732147217, "learning_rate": 3.0276286012729563e-06, "loss": 0.9295, "step": 8305 }, { "epoch": 0.6403021893308665, "grad_norm": 3.6046383380889893, "learning_rate": 3.0264814558753543e-06, "loss": 0.9875, "step": 8306 }, { "epoch": 0.6403792784458835, "grad_norm": 3.460169792175293, "learning_rate": 3.0253344335164114e-06, "loss": 0.9901, "step": 8307 }, { "epoch": 0.6404563675609004, "grad_norm": 3.704393148422241, "learning_rate": 3.0241875342676413e-06, "loss": 0.9067, "step": 8308 }, { "epoch": 0.6405334566759173, "grad_norm": 3.8875725269317627, "learning_rate": 3.023040758200544e-06, "loss": 0.9543, "step": 8309 }, { "epoch": 0.6406105457909343, "grad_norm": 3.9328582286834717, "learning_rate": 3.0218941053866167e-06, "loss": 1.0593, "step": 8310 }, { "epoch": 0.6406876349059513, "grad_norm": 4.249375343322754, "learning_rate": 3.020747575897348e-06, "loss": 0.9223, "step": 8311 }, { "epoch": 0.6407647240209683, "grad_norm": 3.789259672164917, "learning_rate": 3.019601169804216e-06, "loss": 0.9254, "step": 8312 }, { "epoch": 0.6408418131359852, "grad_norm": 3.5472848415374756, "learning_rate": 3.0184548871786946e-06, "loss": 0.9175, "step": 8313 }, { "epoch": 0.6409189022510021, "grad_norm": 3.7821688652038574, "learning_rate": 3.0173087280922493e-06, "loss": 0.8734, "step": 8314 }, { "epoch": 0.6409959913660191, "grad_norm": 3.4091010093688965, "learning_rate": 3.0161626926163344e-06, "loss": 0.9446, "step": 8315 }, { "epoch": 0.6410730804810361, "grad_norm": 3.822620391845703, "learning_rate": 3.015016780822402e-06, "loss": 0.9002, "step": 8316 }, { "epoch": 0.6411501695960531, "grad_norm": 3.6239142417907715, "learning_rate": 3.013870992781894e-06, "loss": 0.8363, "step": 8317 }, { "epoch": 0.64122725871107, "grad_norm": 3.812986373901367, "learning_rate": 3.0127253285662415e-06, "loss": 0.9568, "step": 8318 }, { "epoch": 0.6413043478260869, "grad_norm": 3.996857166290283, "learning_rate": 3.0115797882468733e-06, "loss": 0.863, "step": 8319 }, { "epoch": 0.6413814369411039, "grad_norm": 3.7889609336853027, "learning_rate": 3.0104343718952065e-06, "loss": 1.0847, "step": 8320 }, { "epoch": 0.6414585260561209, "grad_norm": 3.7491159439086914, "learning_rate": 3.0092890795826524e-06, "loss": 0.8731, "step": 8321 }, { "epoch": 0.6415356151711379, "grad_norm": 3.6667377948760986, "learning_rate": 3.0081439113806145e-06, "loss": 0.8165, "step": 8322 }, { "epoch": 0.6416127042861548, "grad_norm": 4.124176502227783, "learning_rate": 3.0069988673604866e-06, "loss": 1.0136, "step": 8323 }, { "epoch": 0.6416897934011717, "grad_norm": 4.588274955749512, "learning_rate": 3.0058539475936577e-06, "loss": 1.0123, "step": 8324 }, { "epoch": 0.6417668825161887, "grad_norm": 3.610478401184082, "learning_rate": 3.0047091521515083e-06, "loss": 0.868, "step": 8325 }, { "epoch": 0.6418439716312057, "grad_norm": 3.607025384902954, "learning_rate": 3.0035644811054083e-06, "loss": 0.8376, "step": 8326 }, { "epoch": 0.6419210607462227, "grad_norm": 3.4604544639587402, "learning_rate": 3.002419934526723e-06, "loss": 0.898, "step": 8327 }, { "epoch": 0.6419981498612396, "grad_norm": 3.819643497467041, "learning_rate": 3.00127551248681e-06, "loss": 0.9074, "step": 8328 }, { "epoch": 0.6420752389762565, "grad_norm": 3.4127678871154785, "learning_rate": 3.000131215057016e-06, "loss": 0.818, "step": 8329 }, { "epoch": 0.6421523280912735, "grad_norm": 3.94305157661438, "learning_rate": 2.998987042308685e-06, "loss": 1.0063, "step": 8330 }, { "epoch": 0.6422294172062905, "grad_norm": 3.653914213180542, "learning_rate": 2.9978429943131466e-06, "loss": 0.9659, "step": 8331 }, { "epoch": 0.6423065063213075, "grad_norm": 3.779825210571289, "learning_rate": 2.9966990711417283e-06, "loss": 0.875, "step": 8332 }, { "epoch": 0.6423835954363244, "grad_norm": 3.4283056259155273, "learning_rate": 2.9955552728657483e-06, "loss": 0.8167, "step": 8333 }, { "epoch": 0.6424606845513413, "grad_norm": 3.8111422061920166, "learning_rate": 2.994411599556515e-06, "loss": 0.8542, "step": 8334 }, { "epoch": 0.6425377736663583, "grad_norm": 3.65975284576416, "learning_rate": 2.993268051285331e-06, "loss": 0.8386, "step": 8335 }, { "epoch": 0.6426148627813753, "grad_norm": 3.7688958644866943, "learning_rate": 2.9921246281234924e-06, "loss": 0.9334, "step": 8336 }, { "epoch": 0.6426919518963923, "grad_norm": 3.5940749645233154, "learning_rate": 2.990981330142283e-06, "loss": 0.9051, "step": 8337 }, { "epoch": 0.6427690410114092, "grad_norm": 3.8755249977111816, "learning_rate": 2.989838157412982e-06, "loss": 0.9065, "step": 8338 }, { "epoch": 0.6428461301264261, "grad_norm": 4.170990943908691, "learning_rate": 2.9886951100068628e-06, "loss": 1.0524, "step": 8339 }, { "epoch": 0.6429232192414431, "grad_norm": 3.61470890045166, "learning_rate": 2.9875521879951852e-06, "loss": 0.951, "step": 8340 }, { "epoch": 0.64300030835646, "grad_norm": 3.7371344566345215, "learning_rate": 2.9864093914492076e-06, "loss": 0.9648, "step": 8341 }, { "epoch": 0.6430773974714771, "grad_norm": 3.93182635307312, "learning_rate": 2.9852667204401742e-06, "loss": 1.0087, "step": 8342 }, { "epoch": 0.643154486586494, "grad_norm": 3.8426945209503174, "learning_rate": 2.9841241750393257e-06, "loss": 0.9239, "step": 8343 }, { "epoch": 0.6432315757015109, "grad_norm": 3.605802297592163, "learning_rate": 2.9829817553178945e-06, "loss": 0.9312, "step": 8344 }, { "epoch": 0.6433086648165279, "grad_norm": 3.347144603729248, "learning_rate": 2.981839461347105e-06, "loss": 0.8394, "step": 8345 }, { "epoch": 0.6433857539315448, "grad_norm": 3.462236166000366, "learning_rate": 2.980697293198174e-06, "loss": 0.9387, "step": 8346 }, { "epoch": 0.6434628430465619, "grad_norm": 3.7107419967651367, "learning_rate": 2.9795552509423075e-06, "loss": 0.7882, "step": 8347 }, { "epoch": 0.6435399321615788, "grad_norm": 3.59287166595459, "learning_rate": 2.9784133346507077e-06, "loss": 0.9505, "step": 8348 }, { "epoch": 0.6436170212765957, "grad_norm": 3.7674858570098877, "learning_rate": 2.9772715443945653e-06, "loss": 0.9572, "step": 8349 }, { "epoch": 0.6436941103916127, "grad_norm": 4.125513076782227, "learning_rate": 2.9761298802450667e-06, "loss": 0.9329, "step": 8350 }, { "epoch": 0.6437711995066296, "grad_norm": 3.9114468097686768, "learning_rate": 2.974988342273388e-06, "loss": 0.8998, "step": 8351 }, { "epoch": 0.6438482886216467, "grad_norm": 3.4808576107025146, "learning_rate": 2.9738469305506985e-06, "loss": 0.865, "step": 8352 }, { "epoch": 0.6439253777366636, "grad_norm": 3.3427348136901855, "learning_rate": 2.972705645148159e-06, "loss": 0.9336, "step": 8353 }, { "epoch": 0.6440024668516805, "grad_norm": 3.8578758239746094, "learning_rate": 2.9715644861369235e-06, "loss": 0.9053, "step": 8354 }, { "epoch": 0.6440795559666975, "grad_norm": 3.955080986022949, "learning_rate": 2.970423453588136e-06, "loss": 1.0019, "step": 8355 }, { "epoch": 0.6441566450817144, "grad_norm": 3.818481922149658, "learning_rate": 2.969282547572936e-06, "loss": 0.9418, "step": 8356 }, { "epoch": 0.6442337341967315, "grad_norm": 3.748859405517578, "learning_rate": 2.9681417681624502e-06, "loss": 1.0381, "step": 8357 }, { "epoch": 0.6443108233117484, "grad_norm": 4.283618450164795, "learning_rate": 2.967001115427802e-06, "loss": 1.0513, "step": 8358 }, { "epoch": 0.6443879124267653, "grad_norm": 3.595768690109253, "learning_rate": 2.965860589440106e-06, "loss": 0.8752, "step": 8359 }, { "epoch": 0.6444650015417823, "grad_norm": 3.731633424758911, "learning_rate": 2.964720190270467e-06, "loss": 0.9551, "step": 8360 }, { "epoch": 0.6445420906567992, "grad_norm": 3.895328998565674, "learning_rate": 2.9635799179899822e-06, "loss": 0.8959, "step": 8361 }, { "epoch": 0.6446191797718163, "grad_norm": 3.839773654937744, "learning_rate": 2.962439772669744e-06, "loss": 0.9535, "step": 8362 }, { "epoch": 0.6446962688868332, "grad_norm": 3.9109835624694824, "learning_rate": 2.961299754380832e-06, "loss": 0.8728, "step": 8363 }, { "epoch": 0.6447733580018501, "grad_norm": 3.788618803024292, "learning_rate": 2.960159863194322e-06, "loss": 0.9667, "step": 8364 }, { "epoch": 0.6448504471168671, "grad_norm": 4.191736698150635, "learning_rate": 2.9590200991812804e-06, "loss": 1.0588, "step": 8365 }, { "epoch": 0.644927536231884, "grad_norm": 3.4580607414245605, "learning_rate": 2.9578804624127644e-06, "loss": 0.849, "step": 8366 }, { "epoch": 0.6450046253469011, "grad_norm": 3.6437692642211914, "learning_rate": 2.9567409529598267e-06, "loss": 0.9661, "step": 8367 }, { "epoch": 0.645081714461918, "grad_norm": 3.486619472503662, "learning_rate": 2.9556015708935064e-06, "loss": 0.855, "step": 8368 }, { "epoch": 0.6451588035769349, "grad_norm": 3.5905685424804688, "learning_rate": 2.9544623162848406e-06, "loss": 0.9604, "step": 8369 }, { "epoch": 0.6452358926919519, "grad_norm": 3.5381009578704834, "learning_rate": 2.9533231892048557e-06, "loss": 0.8643, "step": 8370 }, { "epoch": 0.6453129818069688, "grad_norm": 3.4007678031921387, "learning_rate": 2.9521841897245694e-06, "loss": 0.8136, "step": 8371 }, { "epoch": 0.6453900709219859, "grad_norm": 3.962644338607788, "learning_rate": 2.9510453179149923e-06, "loss": 0.9401, "step": 8372 }, { "epoch": 0.6454671600370028, "grad_norm": 3.902945041656494, "learning_rate": 2.94990657384713e-06, "loss": 0.8761, "step": 8373 }, { "epoch": 0.6455442491520197, "grad_norm": 3.3986709117889404, "learning_rate": 2.9487679575919733e-06, "loss": 0.9076, "step": 8374 }, { "epoch": 0.6456213382670367, "grad_norm": 3.4570724964141846, "learning_rate": 2.947629469220511e-06, "loss": 0.8969, "step": 8375 }, { "epoch": 0.6456984273820536, "grad_norm": 3.3817062377929688, "learning_rate": 2.9464911088037225e-06, "loss": 0.7891, "step": 8376 }, { "epoch": 0.6457755164970707, "grad_norm": 4.3606696128845215, "learning_rate": 2.945352876412577e-06, "loss": 0.8679, "step": 8377 }, { "epoch": 0.6458526056120876, "grad_norm": 3.8586175441741943, "learning_rate": 2.94421477211804e-06, "loss": 0.9431, "step": 8378 }, { "epoch": 0.6459296947271045, "grad_norm": 3.5619122982025146, "learning_rate": 2.9430767959910633e-06, "loss": 0.8947, "step": 8379 }, { "epoch": 0.6460067838421215, "grad_norm": 3.578460693359375, "learning_rate": 2.941938948102595e-06, "loss": 0.9447, "step": 8380 }, { "epoch": 0.6460838729571384, "grad_norm": 3.719597101211548, "learning_rate": 2.9408012285235753e-06, "loss": 0.9453, "step": 8381 }, { "epoch": 0.6461609620721555, "grad_norm": 3.9067883491516113, "learning_rate": 2.939663637324934e-06, "loss": 0.9575, "step": 8382 }, { "epoch": 0.6462380511871724, "grad_norm": 3.6373391151428223, "learning_rate": 2.9385261745775932e-06, "loss": 0.8788, "step": 8383 }, { "epoch": 0.6463151403021893, "grad_norm": 3.917569160461426, "learning_rate": 2.93738884035247e-06, "loss": 0.9704, "step": 8384 }, { "epoch": 0.6463922294172063, "grad_norm": 3.8104026317596436, "learning_rate": 2.9362516347204695e-06, "loss": 0.9048, "step": 8385 }, { "epoch": 0.6464693185322232, "grad_norm": 3.8119072914123535, "learning_rate": 2.9351145577524908e-06, "loss": 0.9322, "step": 8386 }, { "epoch": 0.6465464076472403, "grad_norm": 3.7479817867279053, "learning_rate": 2.9339776095194245e-06, "loss": 0.9542, "step": 8387 }, { "epoch": 0.6466234967622572, "grad_norm": 3.774230718612671, "learning_rate": 2.9328407900921555e-06, "loss": 0.9974, "step": 8388 }, { "epoch": 0.6467005858772741, "grad_norm": 3.979708671569824, "learning_rate": 2.931704099541557e-06, "loss": 1.0159, "step": 8389 }, { "epoch": 0.6467776749922911, "grad_norm": 3.6764190196990967, "learning_rate": 2.9305675379384945e-06, "loss": 0.9333, "step": 8390 }, { "epoch": 0.646854764107308, "grad_norm": 3.608494997024536, "learning_rate": 2.9294311053538306e-06, "loss": 0.8975, "step": 8391 }, { "epoch": 0.646931853222325, "grad_norm": 3.679772138595581, "learning_rate": 2.9282948018584116e-06, "loss": 0.8043, "step": 8392 }, { "epoch": 0.647008942337342, "grad_norm": 3.3427774906158447, "learning_rate": 2.9271586275230823e-06, "loss": 0.7306, "step": 8393 }, { "epoch": 0.6470860314523589, "grad_norm": 3.9158496856689453, "learning_rate": 2.926022582418678e-06, "loss": 0.7937, "step": 8394 }, { "epoch": 0.6471631205673759, "grad_norm": 3.52773380279541, "learning_rate": 2.9248866666160236e-06, "loss": 0.9314, "step": 8395 }, { "epoch": 0.6472402096823928, "grad_norm": 4.079388618469238, "learning_rate": 2.9237508801859394e-06, "loss": 1.0586, "step": 8396 }, { "epoch": 0.6473172987974098, "grad_norm": 3.9858343601226807, "learning_rate": 2.922615223199233e-06, "loss": 0.9373, "step": 8397 }, { "epoch": 0.6473943879124268, "grad_norm": 3.436227798461914, "learning_rate": 2.921479695726709e-06, "loss": 0.9468, "step": 8398 }, { "epoch": 0.6474714770274437, "grad_norm": 3.483386278152466, "learning_rate": 2.9203442978391618e-06, "loss": 0.9141, "step": 8399 }, { "epoch": 0.6475485661424607, "grad_norm": 3.5396289825439453, "learning_rate": 2.9192090296073755e-06, "loss": 0.93, "step": 8400 }, { "epoch": 0.6476256552574776, "grad_norm": 3.383061170578003, "learning_rate": 2.91807389110213e-06, "loss": 0.8128, "step": 8401 }, { "epoch": 0.6477027443724946, "grad_norm": 3.429288864135742, "learning_rate": 2.916938882394194e-06, "loss": 0.8793, "step": 8402 }, { "epoch": 0.6477798334875116, "grad_norm": 3.536487340927124, "learning_rate": 2.9158040035543334e-06, "loss": 0.9317, "step": 8403 }, { "epoch": 0.6478569226025285, "grad_norm": 3.7233903408050537, "learning_rate": 2.914669254653296e-06, "loss": 0.8154, "step": 8404 }, { "epoch": 0.6479340117175455, "grad_norm": 3.469984292984009, "learning_rate": 2.9135346357618304e-06, "loss": 0.9265, "step": 8405 }, { "epoch": 0.6480111008325624, "grad_norm": 3.6804935932159424, "learning_rate": 2.912400146950675e-06, "loss": 1.0209, "step": 8406 }, { "epoch": 0.6480881899475794, "grad_norm": 3.8416061401367188, "learning_rate": 2.9112657882905573e-06, "loss": 0.9045, "step": 8407 }, { "epoch": 0.6481652790625964, "grad_norm": 3.884570360183716, "learning_rate": 2.910131559852201e-06, "loss": 0.8565, "step": 8408 }, { "epoch": 0.6482423681776133, "grad_norm": 3.519493341445923, "learning_rate": 2.9089974617063188e-06, "loss": 0.9198, "step": 8409 }, { "epoch": 0.6483194572926303, "grad_norm": 4.712925434112549, "learning_rate": 2.9078634939236146e-06, "loss": 1.0443, "step": 8410 }, { "epoch": 0.6483965464076472, "grad_norm": 6.0556559562683105, "learning_rate": 2.9067296565747856e-06, "loss": 0.8174, "step": 8411 }, { "epoch": 0.6484736355226642, "grad_norm": 3.844569206237793, "learning_rate": 2.905595949730521e-06, "loss": 1.0147, "step": 8412 }, { "epoch": 0.6485507246376812, "grad_norm": 3.9868366718292236, "learning_rate": 2.9044623734615018e-06, "loss": 0.9485, "step": 8413 }, { "epoch": 0.6486278137526981, "grad_norm": 3.9000837802886963, "learning_rate": 2.903328927838403e-06, "loss": 1.0351, "step": 8414 }, { "epoch": 0.6487049028677151, "grad_norm": 3.564354419708252, "learning_rate": 2.902195612931884e-06, "loss": 0.892, "step": 8415 }, { "epoch": 0.648781991982732, "grad_norm": 3.669095277786255, "learning_rate": 2.901062428812604e-06, "loss": 0.9128, "step": 8416 }, { "epoch": 0.648859081097749, "grad_norm": 3.598200798034668, "learning_rate": 2.8999293755512113e-06, "loss": 0.8725, "step": 8417 }, { "epoch": 0.648936170212766, "grad_norm": 3.7269628047943115, "learning_rate": 2.8987964532183454e-06, "loss": 0.8105, "step": 8418 }, { "epoch": 0.6490132593277829, "grad_norm": 3.622880458831787, "learning_rate": 2.897663661884638e-06, "loss": 0.8715, "step": 8419 }, { "epoch": 0.6490903484427999, "grad_norm": 3.9800972938537598, "learning_rate": 2.896531001620715e-06, "loss": 0.9407, "step": 8420 }, { "epoch": 0.6491674375578168, "grad_norm": 4.113578796386719, "learning_rate": 2.8953984724971874e-06, "loss": 0.9153, "step": 8421 }, { "epoch": 0.6492445266728338, "grad_norm": 4.180689811706543, "learning_rate": 2.8942660745846657e-06, "loss": 0.8857, "step": 8422 }, { "epoch": 0.6493216157878507, "grad_norm": 4.276498794555664, "learning_rate": 2.8931338079537487e-06, "loss": 0.9398, "step": 8423 }, { "epoch": 0.6493987049028677, "grad_norm": 3.5343918800354004, "learning_rate": 2.892001672675026e-06, "loss": 0.9164, "step": 8424 }, { "epoch": 0.6494757940178847, "grad_norm": 3.7870051860809326, "learning_rate": 2.890869668819084e-06, "loss": 1.0146, "step": 8425 }, { "epoch": 0.6495528831329016, "grad_norm": 3.560620069503784, "learning_rate": 2.8897377964564922e-06, "loss": 0.8416, "step": 8426 }, { "epoch": 0.6496299722479186, "grad_norm": 3.742684841156006, "learning_rate": 2.88860605565782e-06, "loss": 0.9449, "step": 8427 }, { "epoch": 0.6497070613629355, "grad_norm": 3.494896411895752, "learning_rate": 2.887474446493625e-06, "loss": 0.8439, "step": 8428 }, { "epoch": 0.6497841504779525, "grad_norm": 3.8634705543518066, "learning_rate": 2.886342969034457e-06, "loss": 0.9453, "step": 8429 }, { "epoch": 0.6498612395929695, "grad_norm": 3.891278028488159, "learning_rate": 2.8852116233508604e-06, "loss": 1.0619, "step": 8430 }, { "epoch": 0.6499383287079864, "grad_norm": 3.782010793685913, "learning_rate": 2.8840804095133617e-06, "loss": 1.0296, "step": 8431 }, { "epoch": 0.6500154178230034, "grad_norm": 3.684227466583252, "learning_rate": 2.8829493275924935e-06, "loss": 0.9614, "step": 8432 }, { "epoch": 0.6500925069380203, "grad_norm": 3.9234118461608887, "learning_rate": 2.8818183776587694e-06, "loss": 1.0703, "step": 8433 }, { "epoch": 0.6501695960530373, "grad_norm": 3.863373279571533, "learning_rate": 2.8806875597827e-06, "loss": 0.9689, "step": 8434 }, { "epoch": 0.6502466851680543, "grad_norm": 3.6144754886627197, "learning_rate": 2.8795568740347868e-06, "loss": 0.8714, "step": 8435 }, { "epoch": 0.6503237742830712, "grad_norm": 4.281971454620361, "learning_rate": 2.878426320485518e-06, "loss": 0.9231, "step": 8436 }, { "epoch": 0.6504008633980882, "grad_norm": 4.0023040771484375, "learning_rate": 2.87729589920538e-06, "loss": 0.9989, "step": 8437 }, { "epoch": 0.6504779525131051, "grad_norm": 3.5423178672790527, "learning_rate": 2.8761656102648495e-06, "loss": 0.9044, "step": 8438 }, { "epoch": 0.650555041628122, "grad_norm": 3.6947436332702637, "learning_rate": 2.875035453734394e-06, "loss": 0.9198, "step": 8439 }, { "epoch": 0.6506321307431391, "grad_norm": 3.7778401374816895, "learning_rate": 2.8739054296844736e-06, "loss": 0.9065, "step": 8440 }, { "epoch": 0.650709219858156, "grad_norm": 3.7728586196899414, "learning_rate": 2.8727755381855373e-06, "loss": 0.9014, "step": 8441 }, { "epoch": 0.650786308973173, "grad_norm": 3.6431329250335693, "learning_rate": 2.8716457793080277e-06, "loss": 0.9221, "step": 8442 }, { "epoch": 0.6508633980881899, "grad_norm": 3.698190927505493, "learning_rate": 2.8705161531223814e-06, "loss": 0.9884, "step": 8443 }, { "epoch": 0.6509404872032069, "grad_norm": 4.110282897949219, "learning_rate": 2.8693866596990237e-06, "loss": 0.9542, "step": 8444 }, { "epoch": 0.6510175763182239, "grad_norm": 4.268514156341553, "learning_rate": 2.868257299108374e-06, "loss": 1.0497, "step": 8445 }, { "epoch": 0.6510946654332408, "grad_norm": 3.500208616256714, "learning_rate": 2.8671280714208427e-06, "loss": 0.8821, "step": 8446 }, { "epoch": 0.6511717545482578, "grad_norm": 3.6203670501708984, "learning_rate": 2.8659989767068277e-06, "loss": 0.8401, "step": 8447 }, { "epoch": 0.6512488436632747, "grad_norm": 3.4511590003967285, "learning_rate": 2.8648700150367236e-06, "loss": 0.8388, "step": 8448 }, { "epoch": 0.6513259327782918, "grad_norm": 3.800976514816284, "learning_rate": 2.863741186480917e-06, "loss": 0.977, "step": 8449 }, { "epoch": 0.6514030218933087, "grad_norm": 3.96077561378479, "learning_rate": 2.862612491109783e-06, "loss": 1.0604, "step": 8450 }, { "epoch": 0.6514801110083256, "grad_norm": 3.6005845069885254, "learning_rate": 2.861483928993692e-06, "loss": 1.0056, "step": 8451 }, { "epoch": 0.6515572001233426, "grad_norm": 3.862231492996216, "learning_rate": 2.8603555002030005e-06, "loss": 0.9447, "step": 8452 }, { "epoch": 0.6516342892383595, "grad_norm": 3.786400318145752, "learning_rate": 2.8592272048080623e-06, "loss": 0.9996, "step": 8453 }, { "epoch": 0.6517113783533766, "grad_norm": 4.21661901473999, "learning_rate": 2.8580990428792205e-06, "loss": 0.9962, "step": 8454 }, { "epoch": 0.6517884674683935, "grad_norm": 3.853390693664551, "learning_rate": 2.85697101448681e-06, "loss": 0.9301, "step": 8455 }, { "epoch": 0.6518655565834104, "grad_norm": 3.9790003299713135, "learning_rate": 2.855843119701158e-06, "loss": 0.8526, "step": 8456 }, { "epoch": 0.6519426456984274, "grad_norm": 4.153457164764404, "learning_rate": 2.8547153585925842e-06, "loss": 0.9439, "step": 8457 }, { "epoch": 0.6520197348134443, "grad_norm": 3.5101563930511475, "learning_rate": 2.853587731231395e-06, "loss": 0.8871, "step": 8458 }, { "epoch": 0.6520968239284614, "grad_norm": 3.815025806427002, "learning_rate": 2.8524602376878952e-06, "loss": 0.9554, "step": 8459 }, { "epoch": 0.6521739130434783, "grad_norm": 3.934506893157959, "learning_rate": 2.8513328780323763e-06, "loss": 0.8709, "step": 8460 }, { "epoch": 0.6522510021584952, "grad_norm": 3.9895248413085938, "learning_rate": 2.850205652335124e-06, "loss": 0.976, "step": 8461 }, { "epoch": 0.6523280912735122, "grad_norm": 3.5615084171295166, "learning_rate": 2.8490785606664177e-06, "loss": 0.8848, "step": 8462 }, { "epoch": 0.6524051803885291, "grad_norm": 3.748917579650879, "learning_rate": 2.8479516030965215e-06, "loss": 0.8879, "step": 8463 }, { "epoch": 0.6524822695035462, "grad_norm": 3.557553768157959, "learning_rate": 2.8468247796956967e-06, "loss": 0.8766, "step": 8464 }, { "epoch": 0.6525593586185631, "grad_norm": 3.534771203994751, "learning_rate": 2.845698090534196e-06, "loss": 0.9342, "step": 8465 }, { "epoch": 0.65263644773358, "grad_norm": 3.2332229614257812, "learning_rate": 2.8445715356822605e-06, "loss": 0.7536, "step": 8466 }, { "epoch": 0.652713536848597, "grad_norm": 3.904291868209839, "learning_rate": 2.8434451152101273e-06, "loss": 0.9559, "step": 8467 }, { "epoch": 0.6527906259636139, "grad_norm": 3.66575288772583, "learning_rate": 2.8423188291880243e-06, "loss": 1.0307, "step": 8468 }, { "epoch": 0.652867715078631, "grad_norm": 3.674485206604004, "learning_rate": 2.8411926776861653e-06, "loss": 0.9073, "step": 8469 }, { "epoch": 0.6529448041936479, "grad_norm": 3.7279632091522217, "learning_rate": 2.840066660774762e-06, "loss": 0.9714, "step": 8470 }, { "epoch": 0.6530218933086648, "grad_norm": 3.9103338718414307, "learning_rate": 2.838940778524015e-06, "loss": 0.8716, "step": 8471 }, { "epoch": 0.6530989824236818, "grad_norm": 3.741309404373169, "learning_rate": 2.8378150310041197e-06, "loss": 0.8169, "step": 8472 }, { "epoch": 0.6531760715386987, "grad_norm": 3.7075369358062744, "learning_rate": 2.83668941828526e-06, "loss": 0.9703, "step": 8473 }, { "epoch": 0.6532531606537157, "grad_norm": 4.015072822570801, "learning_rate": 2.835563940437609e-06, "loss": 0.975, "step": 8474 }, { "epoch": 0.6533302497687327, "grad_norm": 4.320268630981445, "learning_rate": 2.8344385975313366e-06, "loss": 0.9171, "step": 8475 }, { "epoch": 0.6534073388837496, "grad_norm": 3.988584518432617, "learning_rate": 2.8333133896365993e-06, "loss": 0.9586, "step": 8476 }, { "epoch": 0.6534844279987666, "grad_norm": 3.7726614475250244, "learning_rate": 2.8321883168235555e-06, "loss": 0.8503, "step": 8477 }, { "epoch": 0.6535615171137835, "grad_norm": 3.7435741424560547, "learning_rate": 2.8310633791623403e-06, "loss": 0.9676, "step": 8478 }, { "epoch": 0.6536386062288005, "grad_norm": 3.893402099609375, "learning_rate": 2.8299385767230904e-06, "loss": 0.8198, "step": 8479 }, { "epoch": 0.6537156953438175, "grad_norm": 3.577685594558716, "learning_rate": 2.8288139095759315e-06, "loss": 0.9281, "step": 8480 }, { "epoch": 0.6537927844588344, "grad_norm": 3.464526891708374, "learning_rate": 2.8276893777909796e-06, "loss": 0.8223, "step": 8481 }, { "epoch": 0.6538698735738514, "grad_norm": 3.8149683475494385, "learning_rate": 2.826564981438345e-06, "loss": 0.9708, "step": 8482 }, { "epoch": 0.6539469626888683, "grad_norm": 3.6663248538970947, "learning_rate": 2.825440720588129e-06, "loss": 0.8898, "step": 8483 }, { "epoch": 0.6540240518038853, "grad_norm": 3.7101478576660156, "learning_rate": 2.82431659531042e-06, "loss": 0.9334, "step": 8484 }, { "epoch": 0.6541011409189023, "grad_norm": 3.7153666019439697, "learning_rate": 2.823192605675303e-06, "loss": 0.8837, "step": 8485 }, { "epoch": 0.6541782300339192, "grad_norm": 3.713289260864258, "learning_rate": 2.822068751752853e-06, "loss": 0.8889, "step": 8486 }, { "epoch": 0.6542553191489362, "grad_norm": 3.4118247032165527, "learning_rate": 2.820945033613136e-06, "loss": 0.9104, "step": 8487 }, { "epoch": 0.6543324082639531, "grad_norm": 3.6996970176696777, "learning_rate": 2.8198214513262132e-06, "loss": 0.8539, "step": 8488 }, { "epoch": 0.6544094973789701, "grad_norm": 3.8898186683654785, "learning_rate": 2.8186980049621292e-06, "loss": 0.9048, "step": 8489 }, { "epoch": 0.654486586493987, "grad_norm": 3.663559675216675, "learning_rate": 2.8175746945909277e-06, "loss": 0.798, "step": 8490 }, { "epoch": 0.654563675609004, "grad_norm": 4.062438488006592, "learning_rate": 2.816451520282641e-06, "loss": 0.8531, "step": 8491 }, { "epoch": 0.654640764724021, "grad_norm": 3.281128168106079, "learning_rate": 2.8153284821072925e-06, "loss": 0.8421, "step": 8492 }, { "epoch": 0.6547178538390379, "grad_norm": 3.5460658073425293, "learning_rate": 2.814205580134899e-06, "loss": 0.9501, "step": 8493 }, { "epoch": 0.6547949429540549, "grad_norm": 3.5473179817199707, "learning_rate": 2.813082814435469e-06, "loss": 0.8635, "step": 8494 }, { "epoch": 0.6548720320690719, "grad_norm": 3.5406644344329834, "learning_rate": 2.8119601850789966e-06, "loss": 0.8653, "step": 8495 }, { "epoch": 0.6549491211840888, "grad_norm": 4.069020748138428, "learning_rate": 2.810837692135475e-06, "loss": 0.9973, "step": 8496 }, { "epoch": 0.6550262102991058, "grad_norm": 3.7306931018829346, "learning_rate": 2.809715335674885e-06, "loss": 1.004, "step": 8497 }, { "epoch": 0.6551032994141227, "grad_norm": 4.0841288566589355, "learning_rate": 2.8085931157672007e-06, "loss": 0.9853, "step": 8498 }, { "epoch": 0.6551803885291397, "grad_norm": 3.389622211456299, "learning_rate": 2.807471032482387e-06, "loss": 0.9221, "step": 8499 }, { "epoch": 0.6552574776441566, "grad_norm": 3.7449147701263428, "learning_rate": 2.8063490858903987e-06, "loss": 0.9093, "step": 8500 }, { "epoch": 0.6553345667591736, "grad_norm": 3.7935571670532227, "learning_rate": 2.805227276061182e-06, "loss": 0.9671, "step": 8501 }, { "epoch": 0.6554116558741906, "grad_norm": 3.392277717590332, "learning_rate": 2.8041056030646787e-06, "loss": 0.9458, "step": 8502 }, { "epoch": 0.6554887449892075, "grad_norm": 4.1073150634765625, "learning_rate": 2.8029840669708176e-06, "loss": 0.9082, "step": 8503 }, { "epoch": 0.6555658341042245, "grad_norm": 3.53625750541687, "learning_rate": 2.8018626678495215e-06, "loss": 0.9127, "step": 8504 }, { "epoch": 0.6556429232192414, "grad_norm": 3.6561434268951416, "learning_rate": 2.8007414057707054e-06, "loss": 0.9273, "step": 8505 }, { "epoch": 0.6557200123342584, "grad_norm": 3.346360683441162, "learning_rate": 2.7996202808042706e-06, "loss": 0.8784, "step": 8506 }, { "epoch": 0.6557971014492754, "grad_norm": 3.969512462615967, "learning_rate": 2.7984992930201156e-06, "loss": 0.8833, "step": 8507 }, { "epoch": 0.6558741905642923, "grad_norm": 3.466351270675659, "learning_rate": 2.7973784424881273e-06, "loss": 0.8353, "step": 8508 }, { "epoch": 0.6559512796793093, "grad_norm": 3.9077634811401367, "learning_rate": 2.7962577292781856e-06, "loss": 0.9578, "step": 8509 }, { "epoch": 0.6560283687943262, "grad_norm": 4.4450225830078125, "learning_rate": 2.795137153460163e-06, "loss": 1.0021, "step": 8510 }, { "epoch": 0.6561054579093432, "grad_norm": 3.7983291149139404, "learning_rate": 2.794016715103918e-06, "loss": 0.9893, "step": 8511 }, { "epoch": 0.6561825470243602, "grad_norm": 3.427762985229492, "learning_rate": 2.7928964142793048e-06, "loss": 0.8019, "step": 8512 }, { "epoch": 0.6562596361393771, "grad_norm": 4.073552131652832, "learning_rate": 2.7917762510561698e-06, "loss": 0.9104, "step": 8513 }, { "epoch": 0.6563367252543941, "grad_norm": 3.4051129817962646, "learning_rate": 2.790656225504349e-06, "loss": 0.8763, "step": 8514 }, { "epoch": 0.656413814369411, "grad_norm": 3.3330910205841064, "learning_rate": 2.789536337693672e-06, "loss": 0.8136, "step": 8515 }, { "epoch": 0.656490903484428, "grad_norm": 3.8815250396728516, "learning_rate": 2.7884165876939537e-06, "loss": 1.0265, "step": 8516 }, { "epoch": 0.656567992599445, "grad_norm": 3.9009790420532227, "learning_rate": 2.787296975575008e-06, "loss": 0.9191, "step": 8517 }, { "epoch": 0.6566450817144619, "grad_norm": 3.823681354522705, "learning_rate": 2.786177501406635e-06, "loss": 1.0026, "step": 8518 }, { "epoch": 0.6567221708294789, "grad_norm": 3.889641761779785, "learning_rate": 2.7850581652586296e-06, "loss": 0.8824, "step": 8519 }, { "epoch": 0.6567992599444958, "grad_norm": 3.67506742477417, "learning_rate": 2.7839389672007753e-06, "loss": 0.9501, "step": 8520 }, { "epoch": 0.6568763490595128, "grad_norm": 4.033994197845459, "learning_rate": 2.7828199073028504e-06, "loss": 0.9688, "step": 8521 }, { "epoch": 0.6569534381745298, "grad_norm": 3.685767889022827, "learning_rate": 2.78170098563462e-06, "loss": 1.0076, "step": 8522 }, { "epoch": 0.6570305272895467, "grad_norm": 3.811617374420166, "learning_rate": 2.7805822022658452e-06, "loss": 0.8409, "step": 8523 }, { "epoch": 0.6571076164045637, "grad_norm": 3.2844462394714355, "learning_rate": 2.7794635572662755e-06, "loss": 0.8843, "step": 8524 }, { "epoch": 0.6571847055195806, "grad_norm": 3.5882797241210938, "learning_rate": 2.778345050705654e-06, "loss": 0.8763, "step": 8525 }, { "epoch": 0.6572617946345976, "grad_norm": 4.07551383972168, "learning_rate": 2.7772266826537103e-06, "loss": 0.9604, "step": 8526 }, { "epoch": 0.6573388837496146, "grad_norm": 3.5869812965393066, "learning_rate": 2.7761084531801707e-06, "loss": 1.0136, "step": 8527 }, { "epoch": 0.6574159728646315, "grad_norm": 3.7623794078826904, "learning_rate": 2.774990362354752e-06, "loss": 0.9086, "step": 8528 }, { "epoch": 0.6574930619796485, "grad_norm": 3.60953950881958, "learning_rate": 2.7738724102471603e-06, "loss": 0.9353, "step": 8529 }, { "epoch": 0.6575701510946654, "grad_norm": 3.6601200103759766, "learning_rate": 2.772754596927094e-06, "loss": 0.9573, "step": 8530 }, { "epoch": 0.6576472402096823, "grad_norm": 3.794666051864624, "learning_rate": 2.771636922464246e-06, "loss": 0.8832, "step": 8531 }, { "epoch": 0.6577243293246994, "grad_norm": 3.9697749614715576, "learning_rate": 2.7705193869282924e-06, "loss": 1.0734, "step": 8532 }, { "epoch": 0.6578014184397163, "grad_norm": 3.8079090118408203, "learning_rate": 2.7694019903889087e-06, "loss": 0.969, "step": 8533 }, { "epoch": 0.6578785075547333, "grad_norm": 4.480514049530029, "learning_rate": 2.768284732915758e-06, "loss": 1.0543, "step": 8534 }, { "epoch": 0.6579555966697502, "grad_norm": 3.4868109226226807, "learning_rate": 2.767167614578496e-06, "loss": 0.9419, "step": 8535 }, { "epoch": 0.6580326857847671, "grad_norm": 3.8815977573394775, "learning_rate": 2.7660506354467708e-06, "loss": 0.9569, "step": 8536 }, { "epoch": 0.6581097748997842, "grad_norm": 3.8802473545074463, "learning_rate": 2.7649337955902167e-06, "loss": 0.9196, "step": 8537 }, { "epoch": 0.6581868640148011, "grad_norm": 3.837989330291748, "learning_rate": 2.763817095078465e-06, "loss": 0.9033, "step": 8538 }, { "epoch": 0.6582639531298181, "grad_norm": 3.4889914989471436, "learning_rate": 2.762700533981136e-06, "loss": 0.8692, "step": 8539 }, { "epoch": 0.658341042244835, "grad_norm": 3.921881914138794, "learning_rate": 2.761584112367841e-06, "loss": 0.9108, "step": 8540 }, { "epoch": 0.6584181313598519, "grad_norm": 3.6214163303375244, "learning_rate": 2.7604678303081833e-06, "loss": 0.9357, "step": 8541 }, { "epoch": 0.658495220474869, "grad_norm": 3.711642265319824, "learning_rate": 2.7593516878717604e-06, "loss": 0.8258, "step": 8542 }, { "epoch": 0.6585723095898859, "grad_norm": 3.5625925064086914, "learning_rate": 2.758235685128152e-06, "loss": 0.9205, "step": 8543 }, { "epoch": 0.6586493987049029, "grad_norm": 3.9220776557922363, "learning_rate": 2.75711982214694e-06, "loss": 0.9736, "step": 8544 }, { "epoch": 0.6587264878199198, "grad_norm": 3.7191176414489746, "learning_rate": 2.7560040989976894e-06, "loss": 0.8053, "step": 8545 }, { "epoch": 0.6588035769349367, "grad_norm": 3.6026618480682373, "learning_rate": 2.7548885157499617e-06, "loss": 0.9155, "step": 8546 }, { "epoch": 0.6588806660499538, "grad_norm": 3.873753786087036, "learning_rate": 2.7537730724733092e-06, "loss": 0.9101, "step": 8547 }, { "epoch": 0.6589577551649707, "grad_norm": 3.89041805267334, "learning_rate": 2.7526577692372704e-06, "loss": 0.783, "step": 8548 }, { "epoch": 0.6590348442799877, "grad_norm": 3.7555131912231445, "learning_rate": 2.75154260611138e-06, "loss": 1.0063, "step": 8549 }, { "epoch": 0.6591119333950046, "grad_norm": 3.4990248680114746, "learning_rate": 2.750427583165164e-06, "loss": 0.9204, "step": 8550 }, { "epoch": 0.6591890225100215, "grad_norm": 3.4794487953186035, "learning_rate": 2.7493127004681365e-06, "loss": 0.9723, "step": 8551 }, { "epoch": 0.6592661116250386, "grad_norm": 3.5538179874420166, "learning_rate": 2.7481979580898056e-06, "loss": 0.8747, "step": 8552 }, { "epoch": 0.6593432007400555, "grad_norm": 3.8357620239257812, "learning_rate": 2.7470833560996717e-06, "loss": 1.0114, "step": 8553 }, { "epoch": 0.6594202898550725, "grad_norm": 3.8175249099731445, "learning_rate": 2.7459688945672204e-06, "loss": 0.9458, "step": 8554 }, { "epoch": 0.6594973789700894, "grad_norm": 3.8220913410186768, "learning_rate": 2.744854573561935e-06, "loss": 0.9557, "step": 8555 }, { "epoch": 0.6595744680851063, "grad_norm": 4.2890944480896, "learning_rate": 2.7437403931532867e-06, "loss": 1.043, "step": 8556 }, { "epoch": 0.6596515572001234, "grad_norm": 4.034401893615723, "learning_rate": 2.7426263534107394e-06, "loss": 0.9287, "step": 8557 }, { "epoch": 0.6597286463151403, "grad_norm": 3.621591091156006, "learning_rate": 2.7415124544037497e-06, "loss": 0.968, "step": 8558 }, { "epoch": 0.6598057354301573, "grad_norm": 3.7988715171813965, "learning_rate": 2.74039869620176e-06, "loss": 0.9679, "step": 8559 }, { "epoch": 0.6598828245451742, "grad_norm": 3.6728246212005615, "learning_rate": 2.7392850788742088e-06, "loss": 0.8546, "step": 8560 }, { "epoch": 0.6599599136601911, "grad_norm": 3.391083002090454, "learning_rate": 2.7381716024905236e-06, "loss": 0.8998, "step": 8561 }, { "epoch": 0.6600370027752082, "grad_norm": 3.6413748264312744, "learning_rate": 2.7370582671201253e-06, "loss": 1.021, "step": 8562 }, { "epoch": 0.6601140918902251, "grad_norm": 3.9035537242889404, "learning_rate": 2.7359450728324254e-06, "loss": 0.9325, "step": 8563 }, { "epoch": 0.6601911810052421, "grad_norm": 3.603443145751953, "learning_rate": 2.734832019696821e-06, "loss": 0.943, "step": 8564 }, { "epoch": 0.660268270120259, "grad_norm": 4.327446937561035, "learning_rate": 2.7337191077827107e-06, "loss": 0.94, "step": 8565 }, { "epoch": 0.6603453592352759, "grad_norm": 3.7741518020629883, "learning_rate": 2.732606337159476e-06, "loss": 0.8859, "step": 8566 }, { "epoch": 0.660422448350293, "grad_norm": 3.900160074234009, "learning_rate": 2.731493707896493e-06, "loss": 0.8494, "step": 8567 }, { "epoch": 0.6604995374653099, "grad_norm": 4.106858253479004, "learning_rate": 2.730381220063131e-06, "loss": 0.9732, "step": 8568 }, { "epoch": 0.6605766265803269, "grad_norm": 3.8731374740600586, "learning_rate": 2.729268873728743e-06, "loss": 0.9365, "step": 8569 }, { "epoch": 0.6606537156953438, "grad_norm": 4.0173659324646, "learning_rate": 2.7281566689626804e-06, "loss": 0.9892, "step": 8570 }, { "epoch": 0.6607308048103607, "grad_norm": 3.8889834880828857, "learning_rate": 2.7270446058342836e-06, "loss": 0.9164, "step": 8571 }, { "epoch": 0.6608078939253778, "grad_norm": 4.212670803070068, "learning_rate": 2.7259326844128832e-06, "loss": 1.0294, "step": 8572 }, { "epoch": 0.6608849830403947, "grad_norm": 4.06730318069458, "learning_rate": 2.724820904767804e-06, "loss": 0.9905, "step": 8573 }, { "epoch": 0.6609620721554117, "grad_norm": 3.9518141746520996, "learning_rate": 2.7237092669683562e-06, "loss": 0.9677, "step": 8574 }, { "epoch": 0.6610391612704286, "grad_norm": 3.7986531257629395, "learning_rate": 2.722597771083847e-06, "loss": 0.8483, "step": 8575 }, { "epoch": 0.6611162503854455, "grad_norm": 4.189377784729004, "learning_rate": 2.721486417183571e-06, "loss": 1.0454, "step": 8576 }, { "epoch": 0.6611933395004626, "grad_norm": 3.7273128032684326, "learning_rate": 2.7203752053368167e-06, "loss": 0.928, "step": 8577 }, { "epoch": 0.6612704286154795, "grad_norm": 3.6438515186309814, "learning_rate": 2.7192641356128613e-06, "loss": 0.9048, "step": 8578 }, { "epoch": 0.6613475177304965, "grad_norm": 3.6332359313964844, "learning_rate": 2.718153208080977e-06, "loss": 0.9138, "step": 8579 }, { "epoch": 0.6614246068455134, "grad_norm": 3.474804162979126, "learning_rate": 2.7170424228104207e-06, "loss": 0.8285, "step": 8580 }, { "epoch": 0.6615016959605303, "grad_norm": 3.8621301651000977, "learning_rate": 2.7159317798704452e-06, "loss": 0.9538, "step": 8581 }, { "epoch": 0.6615787850755473, "grad_norm": 4.121014595031738, "learning_rate": 2.7148212793302938e-06, "loss": 0.9663, "step": 8582 }, { "epoch": 0.6616558741905643, "grad_norm": 3.514923572540283, "learning_rate": 2.7137109212592006e-06, "loss": 0.8936, "step": 8583 }, { "epoch": 0.6617329633055813, "grad_norm": 3.3887276649475098, "learning_rate": 2.712600705726392e-06, "loss": 0.9403, "step": 8584 }, { "epoch": 0.6618100524205982, "grad_norm": 3.4407477378845215, "learning_rate": 2.7114906328010803e-06, "loss": 0.9276, "step": 8585 }, { "epoch": 0.6618871415356151, "grad_norm": 3.4507927894592285, "learning_rate": 2.7103807025524764e-06, "loss": 0.7437, "step": 8586 }, { "epoch": 0.6619642306506321, "grad_norm": 3.491410255432129, "learning_rate": 2.7092709150497764e-06, "loss": 0.8576, "step": 8587 }, { "epoch": 0.6620413197656491, "grad_norm": 3.461198091506958, "learning_rate": 2.708161270362171e-06, "loss": 0.8797, "step": 8588 }, { "epoch": 0.6621184088806661, "grad_norm": 3.8753464221954346, "learning_rate": 2.7070517685588417e-06, "loss": 0.8896, "step": 8589 }, { "epoch": 0.662195497995683, "grad_norm": 3.9158358573913574, "learning_rate": 2.70594240970896e-06, "loss": 0.8949, "step": 8590 }, { "epoch": 0.6622725871106999, "grad_norm": 12.948868751525879, "learning_rate": 2.7048331938816863e-06, "loss": 0.8589, "step": 8591 }, { "epoch": 0.6623496762257169, "grad_norm": 3.7209019660949707, "learning_rate": 2.7037241211461752e-06, "loss": 0.9651, "step": 8592 }, { "epoch": 0.6624267653407339, "grad_norm": 3.9368491172790527, "learning_rate": 2.7026151915715735e-06, "loss": 0.9401, "step": 8593 }, { "epoch": 0.6625038544557509, "grad_norm": 3.6502487659454346, "learning_rate": 2.7015064052270156e-06, "loss": 0.9531, "step": 8594 }, { "epoch": 0.6625809435707678, "grad_norm": 3.4706952571868896, "learning_rate": 2.700397762181631e-06, "loss": 0.8791, "step": 8595 }, { "epoch": 0.6626580326857847, "grad_norm": 3.8239970207214355, "learning_rate": 2.6992892625045343e-06, "loss": 1.0303, "step": 8596 }, { "epoch": 0.6627351218008017, "grad_norm": 3.561873435974121, "learning_rate": 2.698180906264837e-06, "loss": 0.87, "step": 8597 }, { "epoch": 0.6628122109158187, "grad_norm": 3.5032331943511963, "learning_rate": 2.697072693531637e-06, "loss": 0.9174, "step": 8598 }, { "epoch": 0.6628893000308357, "grad_norm": 3.578794479370117, "learning_rate": 2.695964624374029e-06, "loss": 0.926, "step": 8599 }, { "epoch": 0.6629663891458526, "grad_norm": 3.583988904953003, "learning_rate": 2.6948566988610938e-06, "loss": 0.8603, "step": 8600 }, { "epoch": 0.6630434782608695, "grad_norm": 3.555171489715576, "learning_rate": 2.693748917061906e-06, "loss": 0.8882, "step": 8601 }, { "epoch": 0.6631205673758865, "grad_norm": 3.73492431640625, "learning_rate": 2.692641279045527e-06, "loss": 0.961, "step": 8602 }, { "epoch": 0.6631976564909035, "grad_norm": 3.6243600845336914, "learning_rate": 2.691533784881014e-06, "loss": 0.8199, "step": 8603 }, { "epoch": 0.6632747456059205, "grad_norm": 3.813415288925171, "learning_rate": 2.6904264346374135e-06, "loss": 0.8965, "step": 8604 }, { "epoch": 0.6633518347209374, "grad_norm": 3.7679946422576904, "learning_rate": 2.6893192283837636e-06, "loss": 0.9742, "step": 8605 }, { "epoch": 0.6634289238359543, "grad_norm": 3.7962825298309326, "learning_rate": 2.688212166189095e-06, "loss": 0.9346, "step": 8606 }, { "epoch": 0.6635060129509713, "grad_norm": 3.616288900375366, "learning_rate": 2.6871052481224217e-06, "loss": 0.9487, "step": 8607 }, { "epoch": 0.6635831020659883, "grad_norm": 3.7229197025299072, "learning_rate": 2.685998474252758e-06, "loss": 0.8765, "step": 8608 }, { "epoch": 0.6636601911810053, "grad_norm": 3.9187211990356445, "learning_rate": 2.684891844649103e-06, "loss": 0.9206, "step": 8609 }, { "epoch": 0.6637372802960222, "grad_norm": 3.438185453414917, "learning_rate": 2.6837853593804554e-06, "loss": 0.871, "step": 8610 }, { "epoch": 0.6638143694110391, "grad_norm": 3.9521586894989014, "learning_rate": 2.682679018515792e-06, "loss": 0.9515, "step": 8611 }, { "epoch": 0.6638914585260561, "grad_norm": 3.899041175842285, "learning_rate": 2.68157282212409e-06, "loss": 0.9129, "step": 8612 }, { "epoch": 0.663968547641073, "grad_norm": 3.531867265701294, "learning_rate": 2.680466770274316e-06, "loss": 0.8427, "step": 8613 }, { "epoch": 0.6640456367560901, "grad_norm": 3.4896748065948486, "learning_rate": 2.679360863035425e-06, "loss": 0.7873, "step": 8614 }, { "epoch": 0.664122725871107, "grad_norm": 3.5182361602783203, "learning_rate": 2.678255100476366e-06, "loss": 0.8737, "step": 8615 }, { "epoch": 0.6641998149861239, "grad_norm": 3.8621270656585693, "learning_rate": 2.6771494826660782e-06, "loss": 0.9002, "step": 8616 }, { "epoch": 0.6642769041011409, "grad_norm": 3.783743143081665, "learning_rate": 2.6760440096734875e-06, "loss": 0.9574, "step": 8617 }, { "epoch": 0.6643539932161578, "grad_norm": 3.6670119762420654, "learning_rate": 2.674938681567517e-06, "loss": 0.824, "step": 8618 }, { "epoch": 0.6644310823311749, "grad_norm": 3.6787564754486084, "learning_rate": 2.6738334984170785e-06, "loss": 0.9541, "step": 8619 }, { "epoch": 0.6645081714461918, "grad_norm": 3.8661928176879883, "learning_rate": 2.672728460291073e-06, "loss": 0.9713, "step": 8620 }, { "epoch": 0.6645852605612088, "grad_norm": 5.067914009094238, "learning_rate": 2.671623567258398e-06, "loss": 1.0446, "step": 8621 }, { "epoch": 0.6646623496762257, "grad_norm": 3.7516796588897705, "learning_rate": 2.6705188193879316e-06, "loss": 0.8746, "step": 8622 }, { "epoch": 0.6647394387912426, "grad_norm": 3.7899248600006104, "learning_rate": 2.669414216748552e-06, "loss": 0.9415, "step": 8623 }, { "epoch": 0.6648165279062597, "grad_norm": 3.813930034637451, "learning_rate": 2.6683097594091257e-06, "loss": 1.0627, "step": 8624 }, { "epoch": 0.6648936170212766, "grad_norm": 4.484930992126465, "learning_rate": 2.6672054474385102e-06, "loss": 1.0192, "step": 8625 }, { "epoch": 0.6649707061362936, "grad_norm": 3.5336852073669434, "learning_rate": 2.666101280905553e-06, "loss": 0.8844, "step": 8626 }, { "epoch": 0.6650477952513105, "grad_norm": 3.831902503967285, "learning_rate": 2.664997259879095e-06, "loss": 0.8492, "step": 8627 }, { "epoch": 0.6651248843663274, "grad_norm": 3.4217164516448975, "learning_rate": 2.663893384427963e-06, "loss": 0.9379, "step": 8628 }, { "epoch": 0.6652019734813445, "grad_norm": 3.7807199954986572, "learning_rate": 2.66278965462098e-06, "loss": 0.9947, "step": 8629 }, { "epoch": 0.6652790625963614, "grad_norm": 4.046280384063721, "learning_rate": 2.661686070526956e-06, "loss": 0.9283, "step": 8630 }, { "epoch": 0.6653561517113784, "grad_norm": 3.804257392883301, "learning_rate": 2.6605826322146954e-06, "loss": 1.0612, "step": 8631 }, { "epoch": 0.6654332408263953, "grad_norm": 3.5956528186798096, "learning_rate": 2.659479339752994e-06, "loss": 0.8869, "step": 8632 }, { "epoch": 0.6655103299414122, "grad_norm": 3.916138172149658, "learning_rate": 2.6583761932106323e-06, "loss": 0.8914, "step": 8633 }, { "epoch": 0.6655874190564293, "grad_norm": 4.105411052703857, "learning_rate": 2.6572731926563867e-06, "loss": 0.961, "step": 8634 }, { "epoch": 0.6656645081714462, "grad_norm": 3.324625015258789, "learning_rate": 2.6561703381590244e-06, "loss": 0.8609, "step": 8635 }, { "epoch": 0.6657415972864632, "grad_norm": 3.7240700721740723, "learning_rate": 2.6550676297873023e-06, "loss": 0.8107, "step": 8636 }, { "epoch": 0.6658186864014801, "grad_norm": 3.6989731788635254, "learning_rate": 2.6539650676099687e-06, "loss": 0.8776, "step": 8637 }, { "epoch": 0.665895775516497, "grad_norm": 3.833456039428711, "learning_rate": 2.652862651695765e-06, "loss": 0.9177, "step": 8638 }, { "epoch": 0.6659728646315141, "grad_norm": 3.728043794631958, "learning_rate": 2.651760382113417e-06, "loss": 0.9688, "step": 8639 }, { "epoch": 0.666049953746531, "grad_norm": 3.813891649246216, "learning_rate": 2.6506582589316463e-06, "loss": 0.9131, "step": 8640 }, { "epoch": 0.666127042861548, "grad_norm": 3.5366103649139404, "learning_rate": 2.649556282219167e-06, "loss": 0.9076, "step": 8641 }, { "epoch": 0.6662041319765649, "grad_norm": 3.920154333114624, "learning_rate": 2.64845445204468e-06, "loss": 0.9324, "step": 8642 }, { "epoch": 0.6662812210915818, "grad_norm": 4.236634731292725, "learning_rate": 2.64735276847688e-06, "loss": 0.8984, "step": 8643 }, { "epoch": 0.6663583102065989, "grad_norm": 3.766432762145996, "learning_rate": 2.646251231584449e-06, "loss": 0.9886, "step": 8644 }, { "epoch": 0.6664353993216158, "grad_norm": 3.6179211139678955, "learning_rate": 2.6451498414360633e-06, "loss": 0.7535, "step": 8645 }, { "epoch": 0.6665124884366328, "grad_norm": 3.5795180797576904, "learning_rate": 2.644048598100388e-06, "loss": 0.8096, "step": 8646 }, { "epoch": 0.6665895775516497, "grad_norm": 3.80401873588562, "learning_rate": 2.642947501646082e-06, "loss": 0.9594, "step": 8647 }, { "epoch": 0.6666666666666666, "grad_norm": 3.3994295597076416, "learning_rate": 2.6418465521417914e-06, "loss": 0.7756, "step": 8648 }, { "epoch": 0.6667437557816837, "grad_norm": 3.6575098037719727, "learning_rate": 2.6407457496561573e-06, "loss": 0.9767, "step": 8649 }, { "epoch": 0.6668208448967006, "grad_norm": 3.2171177864074707, "learning_rate": 2.6396450942578046e-06, "loss": 0.8447, "step": 8650 }, { "epoch": 0.6668979340117176, "grad_norm": 3.508512496948242, "learning_rate": 2.638544586015356e-06, "loss": 0.9398, "step": 8651 }, { "epoch": 0.6669750231267345, "grad_norm": 3.8532354831695557, "learning_rate": 2.6374442249974214e-06, "loss": 0.988, "step": 8652 }, { "epoch": 0.6670521122417514, "grad_norm": 4.1069416999816895, "learning_rate": 2.6363440112726037e-06, "loss": 0.9103, "step": 8653 }, { "epoch": 0.6671292013567685, "grad_norm": 3.5245847702026367, "learning_rate": 2.6352439449094952e-06, "loss": 0.9636, "step": 8654 }, { "epoch": 0.6672062904717854, "grad_norm": 3.4307544231414795, "learning_rate": 2.6341440259766792e-06, "loss": 0.8636, "step": 8655 }, { "epoch": 0.6672833795868024, "grad_norm": 3.6783254146575928, "learning_rate": 2.6330442545427303e-06, "loss": 0.8899, "step": 8656 }, { "epoch": 0.6673604687018193, "grad_norm": 3.7773871421813965, "learning_rate": 2.6319446306762136e-06, "loss": 0.9547, "step": 8657 }, { "epoch": 0.6674375578168362, "grad_norm": 3.804189920425415, "learning_rate": 2.6308451544456863e-06, "loss": 0.9195, "step": 8658 }, { "epoch": 0.6675146469318533, "grad_norm": 3.798102855682373, "learning_rate": 2.6297458259196915e-06, "loss": 0.9327, "step": 8659 }, { "epoch": 0.6675917360468702, "grad_norm": 4.101744174957275, "learning_rate": 2.628646645166769e-06, "loss": 1.0034, "step": 8660 }, { "epoch": 0.6676688251618872, "grad_norm": 3.816171884536743, "learning_rate": 2.6275476122554467e-06, "loss": 1.0159, "step": 8661 }, { "epoch": 0.6677459142769041, "grad_norm": 3.46830677986145, "learning_rate": 2.6264487272542427e-06, "loss": 0.9187, "step": 8662 }, { "epoch": 0.667823003391921, "grad_norm": 3.302978515625, "learning_rate": 2.625349990231668e-06, "loss": 0.942, "step": 8663 }, { "epoch": 0.667900092506938, "grad_norm": 3.929262399673462, "learning_rate": 2.624251401256225e-06, "loss": 0.986, "step": 8664 }, { "epoch": 0.667977181621955, "grad_norm": 4.437450408935547, "learning_rate": 2.623152960396401e-06, "loss": 0.8384, "step": 8665 }, { "epoch": 0.668054270736972, "grad_norm": 3.7824137210845947, "learning_rate": 2.622054667720679e-06, "loss": 0.9588, "step": 8666 }, { "epoch": 0.6681313598519889, "grad_norm": 3.94724702835083, "learning_rate": 2.6209565232975342e-06, "loss": 0.875, "step": 8667 }, { "epoch": 0.6682084489670058, "grad_norm": 3.6828274726867676, "learning_rate": 2.619858527195428e-06, "loss": 0.914, "step": 8668 }, { "epoch": 0.6682855380820228, "grad_norm": 3.615562677383423, "learning_rate": 2.618760679482817e-06, "loss": 0.9406, "step": 8669 }, { "epoch": 0.6683626271970398, "grad_norm": 3.8135929107666016, "learning_rate": 2.617662980228144e-06, "loss": 0.8714, "step": 8670 }, { "epoch": 0.6684397163120568, "grad_norm": 3.3155691623687744, "learning_rate": 2.6165654294998442e-06, "loss": 0.7839, "step": 8671 }, { "epoch": 0.6685168054270737, "grad_norm": 3.6126980781555176, "learning_rate": 2.6154680273663468e-06, "loss": 0.8381, "step": 8672 }, { "epoch": 0.6685938945420906, "grad_norm": 4.058097839355469, "learning_rate": 2.614370773896069e-06, "loss": 1.006, "step": 8673 }, { "epoch": 0.6686709836571076, "grad_norm": 3.903541088104248, "learning_rate": 2.613273669157417e-06, "loss": 0.8644, "step": 8674 }, { "epoch": 0.6687480727721246, "grad_norm": 3.7401607036590576, "learning_rate": 2.612176713218792e-06, "loss": 0.9988, "step": 8675 }, { "epoch": 0.6688251618871416, "grad_norm": 3.917459011077881, "learning_rate": 2.61107990614858e-06, "loss": 0.9585, "step": 8676 }, { "epoch": 0.6689022510021585, "grad_norm": 4.191850185394287, "learning_rate": 2.6099832480151642e-06, "loss": 0.931, "step": 8677 }, { "epoch": 0.6689793401171754, "grad_norm": 3.9755399227142334, "learning_rate": 2.6088867388869143e-06, "loss": 1.0736, "step": 8678 }, { "epoch": 0.6690564292321924, "grad_norm": 3.440880298614502, "learning_rate": 2.6077903788321924e-06, "loss": 1.0059, "step": 8679 }, { "epoch": 0.6691335183472094, "grad_norm": 3.5236377716064453, "learning_rate": 2.606694167919353e-06, "loss": 0.9046, "step": 8680 }, { "epoch": 0.6692106074622264, "grad_norm": 3.7371933460235596, "learning_rate": 2.6055981062167348e-06, "loss": 0.9506, "step": 8681 }, { "epoch": 0.6692876965772433, "grad_norm": 3.5741524696350098, "learning_rate": 2.6045021937926738e-06, "loss": 0.8337, "step": 8682 }, { "epoch": 0.6693647856922602, "grad_norm": 3.534313678741455, "learning_rate": 2.6034064307154944e-06, "loss": 0.8608, "step": 8683 }, { "epoch": 0.6694418748072772, "grad_norm": 4.026943206787109, "learning_rate": 2.6023108170535115e-06, "loss": 0.909, "step": 8684 }, { "epoch": 0.6695189639222942, "grad_norm": 3.7044715881347656, "learning_rate": 2.6012153528750316e-06, "loss": 0.9678, "step": 8685 }, { "epoch": 0.6695960530373112, "grad_norm": 3.3647007942199707, "learning_rate": 2.6001200382483525e-06, "loss": 0.7775, "step": 8686 }, { "epoch": 0.6696731421523281, "grad_norm": 3.695740222930908, "learning_rate": 2.599024873241758e-06, "loss": 0.9464, "step": 8687 }, { "epoch": 0.669750231267345, "grad_norm": 3.711057662963867, "learning_rate": 2.5979298579235276e-06, "loss": 0.9555, "step": 8688 }, { "epoch": 0.669827320382362, "grad_norm": 3.6604957580566406, "learning_rate": 2.5968349923619297e-06, "loss": 0.9571, "step": 8689 }, { "epoch": 0.669904409497379, "grad_norm": 3.690217971801758, "learning_rate": 2.5957402766252238e-06, "loss": 0.8819, "step": 8690 }, { "epoch": 0.669981498612396, "grad_norm": 3.983525514602661, "learning_rate": 2.594645710781661e-06, "loss": 0.9634, "step": 8691 }, { "epoch": 0.6700585877274129, "grad_norm": 3.4786341190338135, "learning_rate": 2.59355129489948e-06, "loss": 0.9043, "step": 8692 }, { "epoch": 0.6701356768424298, "grad_norm": 3.7870969772338867, "learning_rate": 2.592457029046912e-06, "loss": 0.8991, "step": 8693 }, { "epoch": 0.6702127659574468, "grad_norm": 3.683763027191162, "learning_rate": 2.5913629132921784e-06, "loss": 0.8989, "step": 8694 }, { "epoch": 0.6702898550724637, "grad_norm": 3.569939136505127, "learning_rate": 2.5902689477034936e-06, "loss": 0.9178, "step": 8695 }, { "epoch": 0.6703669441874808, "grad_norm": 3.9141666889190674, "learning_rate": 2.589175132349059e-06, "loss": 0.9937, "step": 8696 }, { "epoch": 0.6704440333024977, "grad_norm": 3.4919772148132324, "learning_rate": 2.588081467297069e-06, "loss": 0.9049, "step": 8697 }, { "epoch": 0.6705211224175146, "grad_norm": 3.8324713706970215, "learning_rate": 2.5869879526157073e-06, "loss": 1.0839, "step": 8698 }, { "epoch": 0.6705982115325316, "grad_norm": 4.001489639282227, "learning_rate": 2.58589458837315e-06, "loss": 0.9274, "step": 8699 }, { "epoch": 0.6706753006475485, "grad_norm": 4.185301780700684, "learning_rate": 2.584801374637562e-06, "loss": 0.9855, "step": 8700 }, { "epoch": 0.6707523897625656, "grad_norm": 3.5428168773651123, "learning_rate": 2.583708311477102e-06, "loss": 0.9486, "step": 8701 }, { "epoch": 0.6708294788775825, "grad_norm": 3.9238369464874268, "learning_rate": 2.582615398959912e-06, "loss": 0.9111, "step": 8702 }, { "epoch": 0.6709065679925994, "grad_norm": 4.060305595397949, "learning_rate": 2.5815226371541325e-06, "loss": 0.9043, "step": 8703 }, { "epoch": 0.6709836571076164, "grad_norm": 3.543778896331787, "learning_rate": 2.5804300261278903e-06, "loss": 0.8829, "step": 8704 }, { "epoch": 0.6710607462226333, "grad_norm": 3.358534336090088, "learning_rate": 2.579337565949305e-06, "loss": 0.9352, "step": 8705 }, { "epoch": 0.6711378353376504, "grad_norm": 4.072521686553955, "learning_rate": 2.578245256686488e-06, "loss": 0.9195, "step": 8706 }, { "epoch": 0.6712149244526673, "grad_norm": 3.970937728881836, "learning_rate": 2.577153098407534e-06, "loss": 0.878, "step": 8707 }, { "epoch": 0.6712920135676842, "grad_norm": 3.6512842178344727, "learning_rate": 2.576061091180536e-06, "loss": 0.8582, "step": 8708 }, { "epoch": 0.6713691026827012, "grad_norm": 4.31977653503418, "learning_rate": 2.5749692350735756e-06, "loss": 0.8524, "step": 8709 }, { "epoch": 0.6714461917977181, "grad_norm": 3.4686012268066406, "learning_rate": 2.5738775301547238e-06, "loss": 0.8528, "step": 8710 }, { "epoch": 0.6715232809127352, "grad_norm": 3.668100595474243, "learning_rate": 2.572785976492043e-06, "loss": 0.8513, "step": 8711 }, { "epoch": 0.6716003700277521, "grad_norm": 3.792353868484497, "learning_rate": 2.5716945741535864e-06, "loss": 0.7951, "step": 8712 }, { "epoch": 0.671677459142769, "grad_norm": 3.5469858646392822, "learning_rate": 2.570603323207396e-06, "loss": 0.868, "step": 8713 }, { "epoch": 0.671754548257786, "grad_norm": 3.5623743534088135, "learning_rate": 2.5695122237215053e-06, "loss": 0.7898, "step": 8714 }, { "epoch": 0.6718316373728029, "grad_norm": 5.158936023712158, "learning_rate": 2.56842127576394e-06, "loss": 0.8926, "step": 8715 }, { "epoch": 0.67190872648782, "grad_norm": 3.4081263542175293, "learning_rate": 2.5673304794027143e-06, "loss": 0.8877, "step": 8716 }, { "epoch": 0.6719858156028369, "grad_norm": 3.6705238819122314, "learning_rate": 2.566239834705837e-06, "loss": 0.8883, "step": 8717 }, { "epoch": 0.6720629047178538, "grad_norm": 3.546715497970581, "learning_rate": 2.5651493417412986e-06, "loss": 0.7861, "step": 8718 }, { "epoch": 0.6721399938328708, "grad_norm": 3.700578212738037, "learning_rate": 2.5640590005770883e-06, "loss": 0.867, "step": 8719 }, { "epoch": 0.6722170829478877, "grad_norm": 4.1538472175598145, "learning_rate": 2.5629688112811835e-06, "loss": 0.8547, "step": 8720 }, { "epoch": 0.6722941720629048, "grad_norm": 3.4912803173065186, "learning_rate": 2.561878773921551e-06, "loss": 0.7669, "step": 8721 }, { "epoch": 0.6723712611779217, "grad_norm": 3.5746119022369385, "learning_rate": 2.56078888856615e-06, "loss": 0.87, "step": 8722 }, { "epoch": 0.6724483502929386, "grad_norm": 4.2662811279296875, "learning_rate": 2.559699155282931e-06, "loss": 0.9485, "step": 8723 }, { "epoch": 0.6725254394079556, "grad_norm": 4.090740203857422, "learning_rate": 2.558609574139829e-06, "loss": 0.8954, "step": 8724 }, { "epoch": 0.6726025285229725, "grad_norm": 3.631227970123291, "learning_rate": 2.557520145204776e-06, "loss": 0.9711, "step": 8725 }, { "epoch": 0.6726796176379896, "grad_norm": 4.057615756988525, "learning_rate": 2.5564308685456916e-06, "loss": 1.0421, "step": 8726 }, { "epoch": 0.6727567067530065, "grad_norm": 3.319061040878296, "learning_rate": 2.555341744230487e-06, "loss": 0.8149, "step": 8727 }, { "epoch": 0.6728337958680234, "grad_norm": 3.737069606781006, "learning_rate": 2.5542527723270653e-06, "loss": 0.9622, "step": 8728 }, { "epoch": 0.6729108849830404, "grad_norm": 4.169541358947754, "learning_rate": 2.553163952903315e-06, "loss": 0.9304, "step": 8729 }, { "epoch": 0.6729879740980573, "grad_norm": 3.955720901489258, "learning_rate": 2.552075286027119e-06, "loss": 0.9517, "step": 8730 }, { "epoch": 0.6730650632130744, "grad_norm": 3.720717191696167, "learning_rate": 2.5509867717663516e-06, "loss": 0.8208, "step": 8731 }, { "epoch": 0.6731421523280913, "grad_norm": 4.123862266540527, "learning_rate": 2.5498984101888747e-06, "loss": 0.8568, "step": 8732 }, { "epoch": 0.6732192414431082, "grad_norm": 4.052290439605713, "learning_rate": 2.5488102013625425e-06, "loss": 1.0482, "step": 8733 }, { "epoch": 0.6732963305581252, "grad_norm": 3.5889370441436768, "learning_rate": 2.5477221453552015e-06, "loss": 0.9129, "step": 8734 }, { "epoch": 0.6733734196731421, "grad_norm": 4.095925331115723, "learning_rate": 2.5466342422346813e-06, "loss": 0.9051, "step": 8735 }, { "epoch": 0.6734505087881592, "grad_norm": 3.5070693492889404, "learning_rate": 2.5455464920688105e-06, "loss": 0.894, "step": 8736 }, { "epoch": 0.6735275979031761, "grad_norm": 3.590304136276245, "learning_rate": 2.5444588949254035e-06, "loss": 0.9069, "step": 8737 }, { "epoch": 0.673604687018193, "grad_norm": 3.7862966060638428, "learning_rate": 2.5433714508722674e-06, "loss": 0.9632, "step": 8738 }, { "epoch": 0.67368177613321, "grad_norm": 3.6480305194854736, "learning_rate": 2.5422841599771995e-06, "loss": 0.9864, "step": 8739 }, { "epoch": 0.6737588652482269, "grad_norm": 3.975153923034668, "learning_rate": 2.541197022307984e-06, "loss": 0.8769, "step": 8740 }, { "epoch": 0.673835954363244, "grad_norm": 3.6856942176818848, "learning_rate": 2.5401100379323994e-06, "loss": 0.9895, "step": 8741 }, { "epoch": 0.6739130434782609, "grad_norm": 3.8510794639587402, "learning_rate": 2.539023206918212e-06, "loss": 0.842, "step": 8742 }, { "epoch": 0.6739901325932778, "grad_norm": 3.5220234394073486, "learning_rate": 2.537936529333186e-06, "loss": 0.8066, "step": 8743 }, { "epoch": 0.6740672217082948, "grad_norm": 3.950100898742676, "learning_rate": 2.536850005245064e-06, "loss": 1.1013, "step": 8744 }, { "epoch": 0.6741443108233117, "grad_norm": 3.5776264667510986, "learning_rate": 2.535763634721587e-06, "loss": 0.9417, "step": 8745 }, { "epoch": 0.6742213999383287, "grad_norm": 3.58642578125, "learning_rate": 2.5346774178304847e-06, "loss": 0.8659, "step": 8746 }, { "epoch": 0.6742984890533457, "grad_norm": 4.12030029296875, "learning_rate": 2.5335913546394776e-06, "loss": 0.9124, "step": 8747 }, { "epoch": 0.6743755781683626, "grad_norm": 3.6494553089141846, "learning_rate": 2.532505445216275e-06, "loss": 0.9372, "step": 8748 }, { "epoch": 0.6744526672833796, "grad_norm": 3.9090404510498047, "learning_rate": 2.5314196896285804e-06, "loss": 0.9739, "step": 8749 }, { "epoch": 0.6745297563983965, "grad_norm": 3.881211280822754, "learning_rate": 2.530334087944082e-06, "loss": 0.935, "step": 8750 }, { "epoch": 0.6746068455134135, "grad_norm": 3.7291910648345947, "learning_rate": 2.5292486402304617e-06, "loss": 0.83, "step": 8751 }, { "epoch": 0.6746839346284305, "grad_norm": 3.3748013973236084, "learning_rate": 2.5281633465553923e-06, "loss": 0.8803, "step": 8752 }, { "epoch": 0.6747610237434474, "grad_norm": 3.430265188217163, "learning_rate": 2.527078206986537e-06, "loss": 0.9526, "step": 8753 }, { "epoch": 0.6748381128584644, "grad_norm": 3.7419652938842773, "learning_rate": 2.525993221591549e-06, "loss": 0.8539, "step": 8754 }, { "epoch": 0.6749152019734813, "grad_norm": 3.45969557762146, "learning_rate": 2.524908390438069e-06, "loss": 0.9154, "step": 8755 }, { "epoch": 0.6749922910884983, "grad_norm": 4.311490535736084, "learning_rate": 2.5238237135937314e-06, "loss": 0.9163, "step": 8756 }, { "epoch": 0.6750693802035153, "grad_norm": 3.4476709365844727, "learning_rate": 2.5227391911261617e-06, "loss": 0.7989, "step": 8757 }, { "epoch": 0.6751464693185322, "grad_norm": 3.477334976196289, "learning_rate": 2.5216548231029726e-06, "loss": 0.8753, "step": 8758 }, { "epoch": 0.6752235584335492, "grad_norm": 3.5356128215789795, "learning_rate": 2.5205706095917703e-06, "loss": 0.9253, "step": 8759 }, { "epoch": 0.6753006475485661, "grad_norm": 3.273407459259033, "learning_rate": 2.5194865506601507e-06, "loss": 0.9407, "step": 8760 }, { "epoch": 0.6753777366635831, "grad_norm": 3.790802001953125, "learning_rate": 2.518402646375696e-06, "loss": 0.9698, "step": 8761 }, { "epoch": 0.6754548257786, "grad_norm": 3.5114316940307617, "learning_rate": 2.5173188968059836e-06, "loss": 0.9552, "step": 8762 }, { "epoch": 0.675531914893617, "grad_norm": 3.583873987197876, "learning_rate": 2.5162353020185798e-06, "loss": 0.9253, "step": 8763 }, { "epoch": 0.675609004008634, "grad_norm": 3.8350162506103516, "learning_rate": 2.5151518620810413e-06, "loss": 0.8797, "step": 8764 }, { "epoch": 0.6756860931236509, "grad_norm": 3.6162362098693848, "learning_rate": 2.514068577060917e-06, "loss": 0.9368, "step": 8765 }, { "epoch": 0.6757631822386679, "grad_norm": 3.375894546508789, "learning_rate": 2.51298544702574e-06, "loss": 0.8251, "step": 8766 }, { "epoch": 0.6758402713536849, "grad_norm": 3.4712395668029785, "learning_rate": 2.5119024720430396e-06, "loss": 0.8507, "step": 8767 }, { "epoch": 0.6759173604687018, "grad_norm": 3.914985179901123, "learning_rate": 2.5108196521803336e-06, "loss": 0.9589, "step": 8768 }, { "epoch": 0.6759944495837188, "grad_norm": 4.005273342132568, "learning_rate": 2.50973698750513e-06, "loss": 0.8717, "step": 8769 }, { "epoch": 0.6760715386987357, "grad_norm": 3.7250683307647705, "learning_rate": 2.508654478084929e-06, "loss": 0.9127, "step": 8770 }, { "epoch": 0.6761486278137527, "grad_norm": 3.753918170928955, "learning_rate": 2.5075721239872196e-06, "loss": 0.8688, "step": 8771 }, { "epoch": 0.6762257169287696, "grad_norm": 3.8328804969787598, "learning_rate": 2.5064899252794783e-06, "loss": 0.9741, "step": 8772 }, { "epoch": 0.6763028060437866, "grad_norm": 3.605774402618408, "learning_rate": 2.5054078820291754e-06, "loss": 0.9508, "step": 8773 }, { "epoch": 0.6763798951588036, "grad_norm": 3.709643840789795, "learning_rate": 2.5043259943037714e-06, "loss": 0.9305, "step": 8774 }, { "epoch": 0.6764569842738205, "grad_norm": 3.6249752044677734, "learning_rate": 2.5032442621707164e-06, "loss": 0.9045, "step": 8775 }, { "epoch": 0.6765340733888375, "grad_norm": 3.642636299133301, "learning_rate": 2.5021626856974524e-06, "loss": 0.9249, "step": 8776 }, { "epoch": 0.6766111625038544, "grad_norm": 3.822664976119995, "learning_rate": 2.501081264951406e-06, "loss": 1.0165, "step": 8777 }, { "epoch": 0.6766882516188714, "grad_norm": 3.8768982887268066, "learning_rate": 2.5000000000000015e-06, "loss": 0.9671, "step": 8778 }, { "epoch": 0.6767653407338884, "grad_norm": 3.6057324409484863, "learning_rate": 2.498918890910648e-06, "loss": 0.8731, "step": 8779 }, { "epoch": 0.6768424298489053, "grad_norm": 3.6541295051574707, "learning_rate": 2.497837937750749e-06, "loss": 0.9921, "step": 8780 }, { "epoch": 0.6769195189639223, "grad_norm": 3.908982038497925, "learning_rate": 2.496757140587695e-06, "loss": 0.9808, "step": 8781 }, { "epoch": 0.6769966080789392, "grad_norm": 3.8420374393463135, "learning_rate": 2.495676499488871e-06, "loss": 1.0192, "step": 8782 }, { "epoch": 0.6770736971939562, "grad_norm": 3.714198350906372, "learning_rate": 2.494596014521645e-06, "loss": 0.9404, "step": 8783 }, { "epoch": 0.6771507863089732, "grad_norm": 3.6355252265930176, "learning_rate": 2.4935156857533816e-06, "loss": 0.9077, "step": 8784 }, { "epoch": 0.6772278754239901, "grad_norm": 3.312899112701416, "learning_rate": 2.4924355132514337e-06, "loss": 0.8434, "step": 8785 }, { "epoch": 0.6773049645390071, "grad_norm": 3.788271188735962, "learning_rate": 2.491355497083145e-06, "loss": 0.909, "step": 8786 }, { "epoch": 0.677382053654024, "grad_norm": 3.835744619369507, "learning_rate": 2.490275637315848e-06, "loss": 0.9578, "step": 8787 }, { "epoch": 0.677459142769041, "grad_norm": 3.8753020763397217, "learning_rate": 2.489195934016867e-06, "loss": 1.0083, "step": 8788 }, { "epoch": 0.677536231884058, "grad_norm": 3.9721779823303223, "learning_rate": 2.488116387253516e-06, "loss": 0.9055, "step": 8789 }, { "epoch": 0.6776133209990749, "grad_norm": 4.252837181091309, "learning_rate": 2.487036997093098e-06, "loss": 0.9204, "step": 8790 }, { "epoch": 0.6776904101140919, "grad_norm": 3.932285785675049, "learning_rate": 2.485957763602911e-06, "loss": 0.9707, "step": 8791 }, { "epoch": 0.6777674992291088, "grad_norm": 3.8516898155212402, "learning_rate": 2.4848786868502344e-06, "loss": 0.9746, "step": 8792 }, { "epoch": 0.6778445883441259, "grad_norm": 3.652773141860962, "learning_rate": 2.483799766902346e-06, "loss": 0.8415, "step": 8793 }, { "epoch": 0.6779216774591428, "grad_norm": 3.671022891998291, "learning_rate": 2.48272100382651e-06, "loss": 0.9036, "step": 8794 }, { "epoch": 0.6779987665741597, "grad_norm": 3.8974409103393555, "learning_rate": 2.481642397689982e-06, "loss": 1.0436, "step": 8795 }, { "epoch": 0.6780758556891767, "grad_norm": 4.345669269561768, "learning_rate": 2.4805639485600087e-06, "loss": 1.0052, "step": 8796 }, { "epoch": 0.6781529448041936, "grad_norm": 3.7597620487213135, "learning_rate": 2.4794856565038256e-06, "loss": 0.8635, "step": 8797 }, { "epoch": 0.6782300339192107, "grad_norm": 3.701038360595703, "learning_rate": 2.478407521588656e-06, "loss": 0.8971, "step": 8798 }, { "epoch": 0.6783071230342276, "grad_norm": 3.521311044692993, "learning_rate": 2.4773295438817178e-06, "loss": 0.9003, "step": 8799 }, { "epoch": 0.6783842121492445, "grad_norm": 3.864536762237549, "learning_rate": 2.476251723450217e-06, "loss": 1.0305, "step": 8800 }, { "epoch": 0.6784613012642615, "grad_norm": 3.7340474128723145, "learning_rate": 2.475174060361351e-06, "loss": 0.991, "step": 8801 }, { "epoch": 0.6785383903792784, "grad_norm": 3.8703043460845947, "learning_rate": 2.4740965546823076e-06, "loss": 0.9927, "step": 8802 }, { "epoch": 0.6786154794942955, "grad_norm": 3.6807234287261963, "learning_rate": 2.47301920648026e-06, "loss": 0.9202, "step": 8803 }, { "epoch": 0.6786925686093124, "grad_norm": 3.687746047973633, "learning_rate": 2.471942015822377e-06, "loss": 0.9262, "step": 8804 }, { "epoch": 0.6787696577243293, "grad_norm": 3.667069435119629, "learning_rate": 2.4708649827758167e-06, "loss": 0.9589, "step": 8805 }, { "epoch": 0.6788467468393463, "grad_norm": 3.750629425048828, "learning_rate": 2.4697881074077263e-06, "loss": 0.8988, "step": 8806 }, { "epoch": 0.6789238359543632, "grad_norm": 3.8485107421875, "learning_rate": 2.468711389785242e-06, "loss": 1.0432, "step": 8807 }, { "epoch": 0.6790009250693803, "grad_norm": 3.7426390647888184, "learning_rate": 2.4676348299754956e-06, "loss": 0.9872, "step": 8808 }, { "epoch": 0.6790780141843972, "grad_norm": 3.8060035705566406, "learning_rate": 2.4665584280456002e-06, "loss": 0.9285, "step": 8809 }, { "epoch": 0.6791551032994141, "grad_norm": 3.9451937675476074, "learning_rate": 2.4654821840626653e-06, "loss": 1.0148, "step": 8810 }, { "epoch": 0.6792321924144311, "grad_norm": 3.2711398601531982, "learning_rate": 2.4644060980937894e-06, "loss": 0.8548, "step": 8811 }, { "epoch": 0.679309281529448, "grad_norm": 3.3944523334503174, "learning_rate": 2.4633301702060613e-06, "loss": 0.889, "step": 8812 }, { "epoch": 0.679386370644465, "grad_norm": 3.9511239528656006, "learning_rate": 2.4622544004665617e-06, "loss": 0.8868, "step": 8813 }, { "epoch": 0.679463459759482, "grad_norm": 3.569582462310791, "learning_rate": 2.4611787889423546e-06, "loss": 0.8662, "step": 8814 }, { "epoch": 0.6795405488744989, "grad_norm": 3.395447254180908, "learning_rate": 2.460103335700501e-06, "loss": 0.8403, "step": 8815 }, { "epoch": 0.6796176379895159, "grad_norm": 3.699881076812744, "learning_rate": 2.45902804080805e-06, "loss": 0.8871, "step": 8816 }, { "epoch": 0.6796947271045328, "grad_norm": 3.846745491027832, "learning_rate": 2.457952904332041e-06, "loss": 0.8642, "step": 8817 }, { "epoch": 0.6797718162195499, "grad_norm": 3.8669497966766357, "learning_rate": 2.4568779263395026e-06, "loss": 0.9709, "step": 8818 }, { "epoch": 0.6798489053345668, "grad_norm": 3.6306495666503906, "learning_rate": 2.455803106897456e-06, "loss": 0.9367, "step": 8819 }, { "epoch": 0.6799259944495837, "grad_norm": 3.4192559719085693, "learning_rate": 2.454728446072907e-06, "loss": 0.7778, "step": 8820 }, { "epoch": 0.6800030835646007, "grad_norm": 3.654353141784668, "learning_rate": 2.4536539439328575e-06, "loss": 0.9357, "step": 8821 }, { "epoch": 0.6800801726796176, "grad_norm": 3.7927420139312744, "learning_rate": 2.4525796005442963e-06, "loss": 0.9209, "step": 8822 }, { "epoch": 0.6801572617946346, "grad_norm": 3.8942689895629883, "learning_rate": 2.451505415974204e-06, "loss": 0.824, "step": 8823 }, { "epoch": 0.6802343509096516, "grad_norm": 4.133578777313232, "learning_rate": 2.4504313902895523e-06, "loss": 0.9132, "step": 8824 }, { "epoch": 0.6803114400246685, "grad_norm": 4.291555404663086, "learning_rate": 2.449357523557296e-06, "loss": 0.9479, "step": 8825 }, { "epoch": 0.6803885291396855, "grad_norm": 3.8000755310058594, "learning_rate": 2.4482838158443883e-06, "loss": 0.8301, "step": 8826 }, { "epoch": 0.6804656182547024, "grad_norm": 3.7706243991851807, "learning_rate": 2.4472102672177693e-06, "loss": 1.0852, "step": 8827 }, { "epoch": 0.6805427073697194, "grad_norm": 3.8600974082946777, "learning_rate": 2.4461368777443683e-06, "loss": 0.9142, "step": 8828 }, { "epoch": 0.6806197964847364, "grad_norm": 3.7134435176849365, "learning_rate": 2.4450636474911072e-06, "loss": 0.9089, "step": 8829 }, { "epoch": 0.6806968855997533, "grad_norm": 3.886906862258911, "learning_rate": 2.4439905765248944e-06, "loss": 0.9055, "step": 8830 }, { "epoch": 0.6807739747147703, "grad_norm": 3.8458032608032227, "learning_rate": 2.442917664912632e-06, "loss": 0.9037, "step": 8831 }, { "epoch": 0.6808510638297872, "grad_norm": 3.480123281478882, "learning_rate": 2.441844912721209e-06, "loss": 0.8902, "step": 8832 }, { "epoch": 0.6809281529448042, "grad_norm": 3.9139695167541504, "learning_rate": 2.4407723200175064e-06, "loss": 0.9043, "step": 8833 }, { "epoch": 0.6810052420598212, "grad_norm": 3.7381532192230225, "learning_rate": 2.439699886868398e-06, "loss": 1.0108, "step": 8834 }, { "epoch": 0.6810823311748381, "grad_norm": 3.5520753860473633, "learning_rate": 2.438627613340739e-06, "loss": 0.9035, "step": 8835 }, { "epoch": 0.6811594202898551, "grad_norm": 4.716245651245117, "learning_rate": 2.4375554995013827e-06, "loss": 0.9881, "step": 8836 }, { "epoch": 0.681236509404872, "grad_norm": 3.765180826187134, "learning_rate": 2.43648354541717e-06, "loss": 0.9127, "step": 8837 }, { "epoch": 0.681313598519889, "grad_norm": 3.5906150341033936, "learning_rate": 2.435411751154931e-06, "loss": 0.9026, "step": 8838 }, { "epoch": 0.681390687634906, "grad_norm": 3.9749667644500732, "learning_rate": 2.4343401167814895e-06, "loss": 0.8457, "step": 8839 }, { "epoch": 0.6814677767499229, "grad_norm": 4.670644760131836, "learning_rate": 2.4332686423636516e-06, "loss": 0.9388, "step": 8840 }, { "epoch": 0.6815448658649399, "grad_norm": 3.8460373878479004, "learning_rate": 2.43219732796822e-06, "loss": 0.8584, "step": 8841 }, { "epoch": 0.6816219549799568, "grad_norm": 3.915990114212036, "learning_rate": 2.431126173661986e-06, "loss": 0.9734, "step": 8842 }, { "epoch": 0.6816990440949738, "grad_norm": 3.3147318363189697, "learning_rate": 2.430055179511731e-06, "loss": 0.8453, "step": 8843 }, { "epoch": 0.6817761332099908, "grad_norm": 3.620931386947632, "learning_rate": 2.4289843455842255e-06, "loss": 0.8603, "step": 8844 }, { "epoch": 0.6818532223250077, "grad_norm": 5.175318717956543, "learning_rate": 2.4279136719462326e-06, "loss": 0.8268, "step": 8845 }, { "epoch": 0.6819303114400247, "grad_norm": 3.6000447273254395, "learning_rate": 2.4268431586644985e-06, "loss": 0.8702, "step": 8846 }, { "epoch": 0.6820074005550416, "grad_norm": 3.783622980117798, "learning_rate": 2.4257728058057673e-06, "loss": 0.97, "step": 8847 }, { "epoch": 0.6820844896700586, "grad_norm": 3.600386381149292, "learning_rate": 2.424702613436769e-06, "loss": 1.039, "step": 8848 }, { "epoch": 0.6821615787850756, "grad_norm": 3.5093846321105957, "learning_rate": 2.423632581624225e-06, "loss": 0.9421, "step": 8849 }, { "epoch": 0.6822386679000925, "grad_norm": 3.684954881668091, "learning_rate": 2.422562710434848e-06, "loss": 0.8655, "step": 8850 }, { "epoch": 0.6823157570151095, "grad_norm": 3.958432912826538, "learning_rate": 2.4214929999353354e-06, "loss": 0.8486, "step": 8851 }, { "epoch": 0.6823928461301264, "grad_norm": 4.018614768981934, "learning_rate": 2.4204234501923797e-06, "loss": 0.9712, "step": 8852 }, { "epoch": 0.6824699352451434, "grad_norm": 3.787519931793213, "learning_rate": 2.419354061272662e-06, "loss": 0.9451, "step": 8853 }, { "epoch": 0.6825470243601603, "grad_norm": 3.5393474102020264, "learning_rate": 2.4182848332428534e-06, "loss": 0.9476, "step": 8854 }, { "epoch": 0.6826241134751773, "grad_norm": 3.7970054149627686, "learning_rate": 2.417215766169614e-06, "loss": 0.9967, "step": 8855 }, { "epoch": 0.6827012025901943, "grad_norm": 3.7069692611694336, "learning_rate": 2.4161468601195966e-06, "loss": 0.9612, "step": 8856 }, { "epoch": 0.6827782917052112, "grad_norm": 3.631960868835449, "learning_rate": 2.4150781151594386e-06, "loss": 0.9315, "step": 8857 }, { "epoch": 0.6828553808202282, "grad_norm": 3.616215229034424, "learning_rate": 2.4140095313557734e-06, "loss": 0.9053, "step": 8858 }, { "epoch": 0.6829324699352451, "grad_norm": 3.7601921558380127, "learning_rate": 2.4129411087752196e-06, "loss": 1.0008, "step": 8859 }, { "epoch": 0.6830095590502621, "grad_norm": 3.73475980758667, "learning_rate": 2.4118728474843894e-06, "loss": 0.9158, "step": 8860 }, { "epoch": 0.6830866481652791, "grad_norm": 3.5904901027679443, "learning_rate": 2.4108047475498847e-06, "loss": 0.9244, "step": 8861 }, { "epoch": 0.683163737280296, "grad_norm": 3.4571268558502197, "learning_rate": 2.4097368090382922e-06, "loss": 0.8297, "step": 8862 }, { "epoch": 0.683240826395313, "grad_norm": 3.620143413543701, "learning_rate": 2.4086690320161947e-06, "loss": 0.8506, "step": 8863 }, { "epoch": 0.6833179155103299, "grad_norm": 3.556610107421875, "learning_rate": 2.407601416550162e-06, "loss": 0.9505, "step": 8864 }, { "epoch": 0.6833950046253469, "grad_norm": 3.6465303897857666, "learning_rate": 2.4065339627067545e-06, "loss": 0.9715, "step": 8865 }, { "epoch": 0.6834720937403639, "grad_norm": 3.4857358932495117, "learning_rate": 2.4054666705525227e-06, "loss": 0.7897, "step": 8866 }, { "epoch": 0.6835491828553808, "grad_norm": 3.4617760181427, "learning_rate": 2.404399540154008e-06, "loss": 0.8728, "step": 8867 }, { "epoch": 0.6836262719703978, "grad_norm": 3.6965367794036865, "learning_rate": 2.403332571577738e-06, "loss": 0.8499, "step": 8868 }, { "epoch": 0.6837033610854147, "grad_norm": 4.105483055114746, "learning_rate": 2.402265764890233e-06, "loss": 0.9697, "step": 8869 }, { "epoch": 0.6837804502004317, "grad_norm": 4.040141582489014, "learning_rate": 2.4011991201580036e-06, "loss": 0.8711, "step": 8870 }, { "epoch": 0.6838575393154487, "grad_norm": 3.5217995643615723, "learning_rate": 2.4001326374475496e-06, "loss": 0.8271, "step": 8871 }, { "epoch": 0.6839346284304656, "grad_norm": 3.501316785812378, "learning_rate": 2.3990663168253627e-06, "loss": 0.9758, "step": 8872 }, { "epoch": 0.6840117175454826, "grad_norm": 3.6326067447662354, "learning_rate": 2.398000158357918e-06, "loss": 0.909, "step": 8873 }, { "epoch": 0.6840888066604995, "grad_norm": 3.510653257369995, "learning_rate": 2.396934162111686e-06, "loss": 0.9052, "step": 8874 }, { "epoch": 0.6841658957755165, "grad_norm": 3.3554039001464844, "learning_rate": 2.395868328153129e-06, "loss": 0.9057, "step": 8875 }, { "epoch": 0.6842429848905335, "grad_norm": 3.7715797424316406, "learning_rate": 2.3948026565486963e-06, "loss": 0.9396, "step": 8876 }, { "epoch": 0.6843200740055504, "grad_norm": 3.4584174156188965, "learning_rate": 2.3937371473648237e-06, "loss": 0.8345, "step": 8877 }, { "epoch": 0.6843971631205674, "grad_norm": 3.387113571166992, "learning_rate": 2.3926718006679416e-06, "loss": 0.8837, "step": 8878 }, { "epoch": 0.6844742522355843, "grad_norm": 3.546274423599243, "learning_rate": 2.391606616524469e-06, "loss": 0.8913, "step": 8879 }, { "epoch": 0.6845513413506013, "grad_norm": 3.3328139781951904, "learning_rate": 2.390541595000815e-06, "loss": 0.8801, "step": 8880 }, { "epoch": 0.6846284304656183, "grad_norm": 3.5848608016967773, "learning_rate": 2.3894767361633775e-06, "loss": 0.8967, "step": 8881 }, { "epoch": 0.6847055195806352, "grad_norm": 3.5277299880981445, "learning_rate": 2.388412040078547e-06, "loss": 0.8776, "step": 8882 }, { "epoch": 0.6847826086956522, "grad_norm": 3.7416014671325684, "learning_rate": 2.387347506812698e-06, "loss": 0.811, "step": 8883 }, { "epoch": 0.6848596978106691, "grad_norm": 3.776992082595825, "learning_rate": 2.3862831364322013e-06, "loss": 0.966, "step": 8884 }, { "epoch": 0.684936786925686, "grad_norm": 3.511543035507202, "learning_rate": 2.385218929003414e-06, "loss": 0.9115, "step": 8885 }, { "epoch": 0.6850138760407031, "grad_norm": 3.6545498371124268, "learning_rate": 2.3841548845926844e-06, "loss": 0.9297, "step": 8886 }, { "epoch": 0.68509096515572, "grad_norm": 4.169460773468018, "learning_rate": 2.3830910032663514e-06, "loss": 0.9614, "step": 8887 }, { "epoch": 0.685168054270737, "grad_norm": 3.843756675720215, "learning_rate": 2.3820272850907393e-06, "loss": 0.8832, "step": 8888 }, { "epoch": 0.6852451433857539, "grad_norm": 3.783128499984741, "learning_rate": 2.380963730132167e-06, "loss": 0.8474, "step": 8889 }, { "epoch": 0.6853222325007708, "grad_norm": 3.7089338302612305, "learning_rate": 2.3799003384569417e-06, "loss": 0.941, "step": 8890 }, { "epoch": 0.6853993216157879, "grad_norm": 3.5771591663360596, "learning_rate": 2.3788371101313614e-06, "loss": 0.9073, "step": 8891 }, { "epoch": 0.6854764107308048, "grad_norm": 3.6812937259674072, "learning_rate": 2.3777740452217106e-06, "loss": 0.9433, "step": 8892 }, { "epoch": 0.6855534998458218, "grad_norm": 3.610495090484619, "learning_rate": 2.3767111437942703e-06, "loss": 0.9288, "step": 8893 }, { "epoch": 0.6856305889608387, "grad_norm": 3.8256382942199707, "learning_rate": 2.375648405915301e-06, "loss": 0.8738, "step": 8894 }, { "epoch": 0.6857076780758556, "grad_norm": 3.9262545108795166, "learning_rate": 2.374585831651062e-06, "loss": 0.8454, "step": 8895 }, { "epoch": 0.6857847671908727, "grad_norm": 3.498192071914673, "learning_rate": 2.3735234210677994e-06, "loss": 0.8577, "step": 8896 }, { "epoch": 0.6858618563058896, "grad_norm": 3.9072468280792236, "learning_rate": 2.3724611742317483e-06, "loss": 0.906, "step": 8897 }, { "epoch": 0.6859389454209066, "grad_norm": 3.8992998600006104, "learning_rate": 2.3713990912091367e-06, "loss": 0.8959, "step": 8898 }, { "epoch": 0.6860160345359235, "grad_norm": 3.6476802825927734, "learning_rate": 2.3703371720661754e-06, "loss": 0.8936, "step": 8899 }, { "epoch": 0.6860931236509404, "grad_norm": 3.575178384780884, "learning_rate": 2.369275416869073e-06, "loss": 0.9295, "step": 8900 }, { "epoch": 0.6861702127659575, "grad_norm": 4.027018070220947, "learning_rate": 2.3682138256840224e-06, "loss": 0.9805, "step": 8901 }, { "epoch": 0.6862473018809744, "grad_norm": 3.9200379848480225, "learning_rate": 2.3671523985772105e-06, "loss": 0.9032, "step": 8902 }, { "epoch": 0.6863243909959914, "grad_norm": 3.6151862144470215, "learning_rate": 2.36609113561481e-06, "loss": 0.8806, "step": 8903 }, { "epoch": 0.6864014801110083, "grad_norm": 4.015349388122559, "learning_rate": 2.365030036862988e-06, "loss": 0.9448, "step": 8904 }, { "epoch": 0.6864785692260252, "grad_norm": 3.450439691543579, "learning_rate": 2.363969102387894e-06, "loss": 0.9339, "step": 8905 }, { "epoch": 0.6865556583410423, "grad_norm": 3.489480972290039, "learning_rate": 2.3629083322556744e-06, "loss": 0.8266, "step": 8906 }, { "epoch": 0.6866327474560592, "grad_norm": 3.841921091079712, "learning_rate": 2.361847726532463e-06, "loss": 0.9075, "step": 8907 }, { "epoch": 0.6867098365710762, "grad_norm": 3.6356115341186523, "learning_rate": 2.3607872852843814e-06, "loss": 0.9278, "step": 8908 }, { "epoch": 0.6867869256860931, "grad_norm": 3.600667715072632, "learning_rate": 2.3597270085775466e-06, "loss": 0.9311, "step": 8909 }, { "epoch": 0.68686401480111, "grad_norm": 3.5631704330444336, "learning_rate": 2.3586668964780562e-06, "loss": 0.8496, "step": 8910 }, { "epoch": 0.6869411039161271, "grad_norm": 3.3869268894195557, "learning_rate": 2.3576069490520046e-06, "loss": 0.94, "step": 8911 }, { "epoch": 0.687018193031144, "grad_norm": 4.510827541351318, "learning_rate": 2.3565471663654752e-06, "loss": 0.8451, "step": 8912 }, { "epoch": 0.687095282146161, "grad_norm": 3.5590405464172363, "learning_rate": 2.355487548484539e-06, "loss": 0.9092, "step": 8913 }, { "epoch": 0.6871723712611779, "grad_norm": 3.764672040939331, "learning_rate": 2.3544280954752573e-06, "loss": 0.9336, "step": 8914 }, { "epoch": 0.6872494603761948, "grad_norm": 3.8435845375061035, "learning_rate": 2.3533688074036847e-06, "loss": 0.9875, "step": 8915 }, { "epoch": 0.6873265494912119, "grad_norm": 3.645751714706421, "learning_rate": 2.3523096843358573e-06, "loss": 0.8304, "step": 8916 }, { "epoch": 0.6874036386062288, "grad_norm": 3.7767014503479004, "learning_rate": 2.3512507263378086e-06, "loss": 0.8513, "step": 8917 }, { "epoch": 0.6874807277212458, "grad_norm": 3.934950590133667, "learning_rate": 2.3501919334755584e-06, "loss": 0.8667, "step": 8918 }, { "epoch": 0.6875578168362627, "grad_norm": 3.6691348552703857, "learning_rate": 2.3491333058151185e-06, "loss": 1.0102, "step": 8919 }, { "epoch": 0.6876349059512796, "grad_norm": 3.4543542861938477, "learning_rate": 2.3480748434224872e-06, "loss": 0.8902, "step": 8920 }, { "epoch": 0.6877119950662967, "grad_norm": 4.049129486083984, "learning_rate": 2.347016546363654e-06, "loss": 0.821, "step": 8921 }, { "epoch": 0.6877890841813136, "grad_norm": 3.604243755340576, "learning_rate": 2.3459584147046e-06, "loss": 0.8626, "step": 8922 }, { "epoch": 0.6878661732963306, "grad_norm": 3.946105480194092, "learning_rate": 2.344900448511293e-06, "loss": 0.9558, "step": 8923 }, { "epoch": 0.6879432624113475, "grad_norm": 3.4957942962646484, "learning_rate": 2.3438426478496936e-06, "loss": 0.8554, "step": 8924 }, { "epoch": 0.6880203515263644, "grad_norm": 3.9918699264526367, "learning_rate": 2.3427850127857464e-06, "loss": 1.0795, "step": 8925 }, { "epoch": 0.6880974406413815, "grad_norm": 3.549349069595337, "learning_rate": 2.341727543385392e-06, "loss": 0.8196, "step": 8926 }, { "epoch": 0.6881745297563984, "grad_norm": 3.535104751586914, "learning_rate": 2.3406702397145574e-06, "loss": 0.8999, "step": 8927 }, { "epoch": 0.6882516188714154, "grad_norm": 3.535996913909912, "learning_rate": 2.3396131018391606e-06, "loss": 0.8936, "step": 8928 }, { "epoch": 0.6883287079864323, "grad_norm": 4.0836262702941895, "learning_rate": 2.3385561298251087e-06, "loss": 0.9796, "step": 8929 }, { "epoch": 0.6884057971014492, "grad_norm": 3.8373196125030518, "learning_rate": 2.337499323738299e-06, "loss": 0.9182, "step": 8930 }, { "epoch": 0.6884828862164662, "grad_norm": 3.3470261096954346, "learning_rate": 2.3364426836446163e-06, "loss": 0.8113, "step": 8931 }, { "epoch": 0.6885599753314832, "grad_norm": 3.9608960151672363, "learning_rate": 2.3353862096099365e-06, "loss": 0.9545, "step": 8932 }, { "epoch": 0.6886370644465002, "grad_norm": 3.7849230766296387, "learning_rate": 2.3343299017001263e-06, "loss": 0.8956, "step": 8933 }, { "epoch": 0.6887141535615171, "grad_norm": 4.301094055175781, "learning_rate": 2.333273759981041e-06, "loss": 0.9049, "step": 8934 }, { "epoch": 0.688791242676534, "grad_norm": 3.382276773452759, "learning_rate": 2.3322177845185278e-06, "loss": 0.8616, "step": 8935 }, { "epoch": 0.688868331791551, "grad_norm": 3.7838985919952393, "learning_rate": 2.3311619753784167e-06, "loss": 0.9545, "step": 8936 }, { "epoch": 0.688945420906568, "grad_norm": 3.419304609298706, "learning_rate": 2.3301063326265338e-06, "loss": 0.9138, "step": 8937 }, { "epoch": 0.689022510021585, "grad_norm": 3.7308456897735596, "learning_rate": 2.329050856328694e-06, "loss": 0.8699, "step": 8938 }, { "epoch": 0.6890995991366019, "grad_norm": 3.8383610248565674, "learning_rate": 2.3279955465506995e-06, "loss": 0.9504, "step": 8939 }, { "epoch": 0.6891766882516188, "grad_norm": 3.7427611351013184, "learning_rate": 2.3269404033583443e-06, "loss": 0.9492, "step": 8940 }, { "epoch": 0.6892537773666358, "grad_norm": 3.5722649097442627, "learning_rate": 2.3258854268174125e-06, "loss": 0.9321, "step": 8941 }, { "epoch": 0.6893308664816528, "grad_norm": 4.43306827545166, "learning_rate": 2.324830616993673e-06, "loss": 1.0846, "step": 8942 }, { "epoch": 0.6894079555966698, "grad_norm": 3.7848153114318848, "learning_rate": 2.3237759739528893e-06, "loss": 0.8663, "step": 8943 }, { "epoch": 0.6894850447116867, "grad_norm": 4.198482513427734, "learning_rate": 2.3227214977608136e-06, "loss": 0.9388, "step": 8944 }, { "epoch": 0.6895621338267036, "grad_norm": 3.4994449615478516, "learning_rate": 2.321667188483186e-06, "loss": 0.8521, "step": 8945 }, { "epoch": 0.6896392229417206, "grad_norm": 3.8813703060150146, "learning_rate": 2.3206130461857403e-06, "loss": 1.072, "step": 8946 }, { "epoch": 0.6897163120567376, "grad_norm": 3.928469181060791, "learning_rate": 2.3195590709341915e-06, "loss": 0.8955, "step": 8947 }, { "epoch": 0.6897934011717546, "grad_norm": 3.67775821685791, "learning_rate": 2.3185052627942528e-06, "loss": 0.9373, "step": 8948 }, { "epoch": 0.6898704902867715, "grad_norm": 3.703199863433838, "learning_rate": 2.317451621831623e-06, "loss": 0.9561, "step": 8949 }, { "epoch": 0.6899475794017884, "grad_norm": 3.4708447456359863, "learning_rate": 2.3163981481119913e-06, "loss": 0.8132, "step": 8950 }, { "epoch": 0.6900246685168054, "grad_norm": 3.7089951038360596, "learning_rate": 2.3153448417010367e-06, "loss": 0.9367, "step": 8951 }, { "epoch": 0.6901017576318224, "grad_norm": 3.567253351211548, "learning_rate": 2.314291702664428e-06, "loss": 0.9247, "step": 8952 }, { "epoch": 0.6901788467468394, "grad_norm": 4.459877967834473, "learning_rate": 2.3132387310678212e-06, "loss": 0.9534, "step": 8953 }, { "epoch": 0.6902559358618563, "grad_norm": 3.3123741149902344, "learning_rate": 2.3121859269768637e-06, "loss": 0.8037, "step": 8954 }, { "epoch": 0.6903330249768732, "grad_norm": 3.7758708000183105, "learning_rate": 2.3111332904571933e-06, "loss": 0.8445, "step": 8955 }, { "epoch": 0.6904101140918902, "grad_norm": 3.962944507598877, "learning_rate": 2.3100808215744365e-06, "loss": 0.9568, "step": 8956 }, { "epoch": 0.6904872032069072, "grad_norm": 3.808398723602295, "learning_rate": 2.3090285203942104e-06, "loss": 0.9115, "step": 8957 }, { "epoch": 0.6905642923219242, "grad_norm": 3.7572476863861084, "learning_rate": 2.3079763869821176e-06, "loss": 0.9322, "step": 8958 }, { "epoch": 0.6906413814369411, "grad_norm": 3.5704262256622314, "learning_rate": 2.3069244214037546e-06, "loss": 0.8228, "step": 8959 }, { "epoch": 0.690718470551958, "grad_norm": 3.9301810264587402, "learning_rate": 2.3058726237247063e-06, "loss": 0.903, "step": 8960 }, { "epoch": 0.690795559666975, "grad_norm": 3.7818596363067627, "learning_rate": 2.3048209940105465e-06, "loss": 0.8574, "step": 8961 }, { "epoch": 0.690872648781992, "grad_norm": 3.3645167350769043, "learning_rate": 2.3037695323268394e-06, "loss": 0.9248, "step": 8962 }, { "epoch": 0.690949737897009, "grad_norm": 3.7038087844848633, "learning_rate": 2.3027182387391373e-06, "loss": 0.8641, "step": 8963 }, { "epoch": 0.6910268270120259, "grad_norm": 3.863682985305786, "learning_rate": 2.3016671133129837e-06, "loss": 0.9759, "step": 8964 }, { "epoch": 0.6911039161270429, "grad_norm": 4.024723529815674, "learning_rate": 2.3006161561139105e-06, "loss": 0.8978, "step": 8965 }, { "epoch": 0.6911810052420598, "grad_norm": 3.979031562805176, "learning_rate": 2.29956536720744e-06, "loss": 0.95, "step": 8966 }, { "epoch": 0.6912580943570767, "grad_norm": 4.345090389251709, "learning_rate": 2.298514746659084e-06, "loss": 1.0485, "step": 8967 }, { "epoch": 0.6913351834720938, "grad_norm": 3.6718671321868896, "learning_rate": 2.2974642945343405e-06, "loss": 0.9435, "step": 8968 }, { "epoch": 0.6914122725871107, "grad_norm": 4.286196231842041, "learning_rate": 2.2964140108987015e-06, "loss": 0.9947, "step": 8969 }, { "epoch": 0.6914893617021277, "grad_norm": 3.392043352127075, "learning_rate": 2.2953638958176465e-06, "loss": 0.7898, "step": 8970 }, { "epoch": 0.6915664508171446, "grad_norm": 3.7056403160095215, "learning_rate": 2.2943139493566456e-06, "loss": 1.0118, "step": 8971 }, { "epoch": 0.6916435399321615, "grad_norm": 3.6872429847717285, "learning_rate": 2.293264171581158e-06, "loss": 0.999, "step": 8972 }, { "epoch": 0.6917206290471786, "grad_norm": 3.7803478240966797, "learning_rate": 2.2922145625566293e-06, "loss": 0.9195, "step": 8973 }, { "epoch": 0.6917977181621955, "grad_norm": 4.061209201812744, "learning_rate": 2.2911651223484987e-06, "loss": 0.9921, "step": 8974 }, { "epoch": 0.6918748072772125, "grad_norm": 3.8408148288726807, "learning_rate": 2.2901158510221934e-06, "loss": 0.9547, "step": 8975 }, { "epoch": 0.6919518963922294, "grad_norm": 4.113273620605469, "learning_rate": 2.2890667486431296e-06, "loss": 0.9756, "step": 8976 }, { "epoch": 0.6920289855072463, "grad_norm": 3.689815044403076, "learning_rate": 2.288017815276714e-06, "loss": 0.8866, "step": 8977 }, { "epoch": 0.6921060746222634, "grad_norm": 3.9249446392059326, "learning_rate": 2.2869690509883435e-06, "loss": 0.9308, "step": 8978 }, { "epoch": 0.6921831637372803, "grad_norm": 3.9732306003570557, "learning_rate": 2.2859204558433994e-06, "loss": 0.8548, "step": 8979 }, { "epoch": 0.6922602528522973, "grad_norm": 3.4670419692993164, "learning_rate": 2.284872029907259e-06, "loss": 0.9897, "step": 8980 }, { "epoch": 0.6923373419673142, "grad_norm": 3.4217910766601562, "learning_rate": 2.283823773245285e-06, "loss": 0.8297, "step": 8981 }, { "epoch": 0.6924144310823311, "grad_norm": 3.647320032119751, "learning_rate": 2.2827756859228316e-06, "loss": 0.8505, "step": 8982 }, { "epoch": 0.6924915201973482, "grad_norm": 3.502896785736084, "learning_rate": 2.281727768005243e-06, "loss": 0.8633, "step": 8983 }, { "epoch": 0.6925686093123651, "grad_norm": 3.6835694313049316, "learning_rate": 2.2806800195578484e-06, "loss": 0.9993, "step": 8984 }, { "epoch": 0.6926456984273821, "grad_norm": 3.4930477142333984, "learning_rate": 2.2796324406459703e-06, "loss": 0.7549, "step": 8985 }, { "epoch": 0.692722787542399, "grad_norm": 3.864713668823242, "learning_rate": 2.2785850313349207e-06, "loss": 0.9116, "step": 8986 }, { "epoch": 0.6927998766574159, "grad_norm": 3.770353317260742, "learning_rate": 2.277537791689999e-06, "loss": 0.9246, "step": 8987 }, { "epoch": 0.692876965772433, "grad_norm": 3.649230718612671, "learning_rate": 2.276490721776497e-06, "loss": 0.9455, "step": 8988 }, { "epoch": 0.6929540548874499, "grad_norm": 3.8266777992248535, "learning_rate": 2.2754438216596946e-06, "loss": 0.9737, "step": 8989 }, { "epoch": 0.6930311440024669, "grad_norm": 3.5182337760925293, "learning_rate": 2.2743970914048575e-06, "loss": 0.9442, "step": 8990 }, { "epoch": 0.6931082331174838, "grad_norm": 4.085440158843994, "learning_rate": 2.273350531077245e-06, "loss": 1.0016, "step": 8991 }, { "epoch": 0.6931853222325007, "grad_norm": 3.814785957336426, "learning_rate": 2.2723041407421055e-06, "loss": 0.9506, "step": 8992 }, { "epoch": 0.6932624113475178, "grad_norm": 3.8445160388946533, "learning_rate": 2.2712579204646755e-06, "loss": 0.9151, "step": 8993 }, { "epoch": 0.6933395004625347, "grad_norm": 3.886320114135742, "learning_rate": 2.270211870310184e-06, "loss": 0.9457, "step": 8994 }, { "epoch": 0.6934165895775517, "grad_norm": 4.0595011711120605, "learning_rate": 2.269165990343842e-06, "loss": 0.9247, "step": 8995 }, { "epoch": 0.6934936786925686, "grad_norm": 3.6124463081359863, "learning_rate": 2.2681202806308572e-06, "loss": 0.8495, "step": 8996 }, { "epoch": 0.6935707678075855, "grad_norm": 3.549788236618042, "learning_rate": 2.2670747412364243e-06, "loss": 0.8262, "step": 8997 }, { "epoch": 0.6936478569226026, "grad_norm": 3.6086161136627197, "learning_rate": 2.266029372225727e-06, "loss": 0.9215, "step": 8998 }, { "epoch": 0.6937249460376195, "grad_norm": 3.8933184146881104, "learning_rate": 2.2649841736639387e-06, "loss": 0.8623, "step": 8999 }, { "epoch": 0.6938020351526365, "grad_norm": 3.9083821773529053, "learning_rate": 2.2639391456162246e-06, "loss": 1.0115, "step": 9000 }, { "epoch": 0.6938791242676534, "grad_norm": 3.4845428466796875, "learning_rate": 2.262894288147732e-06, "loss": 0.8953, "step": 9001 }, { "epoch": 0.6939562133826703, "grad_norm": 3.57886004447937, "learning_rate": 2.2618496013236046e-06, "loss": 0.8971, "step": 9002 }, { "epoch": 0.6940333024976874, "grad_norm": 3.5042362213134766, "learning_rate": 2.2608050852089738e-06, "loss": 0.8637, "step": 9003 }, { "epoch": 0.6941103916127043, "grad_norm": 3.460904598236084, "learning_rate": 2.2597607398689588e-06, "loss": 0.8448, "step": 9004 }, { "epoch": 0.6941874807277213, "grad_norm": 3.7049005031585693, "learning_rate": 2.2587165653686714e-06, "loss": 0.9066, "step": 9005 }, { "epoch": 0.6942645698427382, "grad_norm": 3.4977948665618896, "learning_rate": 2.257672561773207e-06, "loss": 0.8732, "step": 9006 }, { "epoch": 0.6943416589577551, "grad_norm": 4.0038676261901855, "learning_rate": 2.256628729147654e-06, "loss": 0.9104, "step": 9007 }, { "epoch": 0.6944187480727722, "grad_norm": 3.465859889984131, "learning_rate": 2.255585067557093e-06, "loss": 0.8565, "step": 9008 }, { "epoch": 0.6944958371877891, "grad_norm": 3.844653606414795, "learning_rate": 2.254541577066589e-06, "loss": 0.949, "step": 9009 }, { "epoch": 0.6945729263028061, "grad_norm": 3.570842742919922, "learning_rate": 2.2534982577412013e-06, "loss": 0.9655, "step": 9010 }, { "epoch": 0.694650015417823, "grad_norm": 3.9422519207000732, "learning_rate": 2.2524551096459703e-06, "loss": 0.9421, "step": 9011 }, { "epoch": 0.6947271045328399, "grad_norm": 3.6970112323760986, "learning_rate": 2.251412132845933e-06, "loss": 0.9201, "step": 9012 }, { "epoch": 0.694804193647857, "grad_norm": 3.5936856269836426, "learning_rate": 2.2503693274061145e-06, "loss": 0.8951, "step": 9013 }, { "epoch": 0.6948812827628739, "grad_norm": 3.511066436767578, "learning_rate": 2.2493266933915274e-06, "loss": 0.845, "step": 9014 }, { "epoch": 0.6949583718778909, "grad_norm": 3.7108609676361084, "learning_rate": 2.2482842308671766e-06, "loss": 0.97, "step": 9015 }, { "epoch": 0.6950354609929078, "grad_norm": 3.6902565956115723, "learning_rate": 2.247241939898051e-06, "loss": 0.959, "step": 9016 }, { "epoch": 0.6951125501079247, "grad_norm": 3.712344169616699, "learning_rate": 2.2461998205491335e-06, "loss": 0.9824, "step": 9017 }, { "epoch": 0.6951896392229417, "grad_norm": 3.7560031414031982, "learning_rate": 2.2451578728853944e-06, "loss": 0.8713, "step": 9018 }, { "epoch": 0.6952667283379587, "grad_norm": 3.6209821701049805, "learning_rate": 2.244116096971795e-06, "loss": 0.8807, "step": 9019 }, { "epoch": 0.6953438174529757, "grad_norm": 3.638458490371704, "learning_rate": 2.2430744928732854e-06, "loss": 0.8903, "step": 9020 }, { "epoch": 0.6954209065679926, "grad_norm": 3.5852041244506836, "learning_rate": 2.242033060654801e-06, "loss": 0.9105, "step": 9021 }, { "epoch": 0.6954979956830095, "grad_norm": 3.6906344890594482, "learning_rate": 2.240991800381271e-06, "loss": 0.9511, "step": 9022 }, { "epoch": 0.6955750847980265, "grad_norm": 3.609318971633911, "learning_rate": 2.2399507121176127e-06, "loss": 0.9687, "step": 9023 }, { "epoch": 0.6956521739130435, "grad_norm": 3.8320932388305664, "learning_rate": 2.238909795928734e-06, "loss": 0.9885, "step": 9024 }, { "epoch": 0.6957292630280605, "grad_norm": 3.6839687824249268, "learning_rate": 2.2378690518795284e-06, "loss": 0.8995, "step": 9025 }, { "epoch": 0.6958063521430774, "grad_norm": 4.18240213394165, "learning_rate": 2.2368284800348845e-06, "loss": 0.9525, "step": 9026 }, { "epoch": 0.6958834412580943, "grad_norm": 3.770507574081421, "learning_rate": 2.2357880804596714e-06, "loss": 0.9616, "step": 9027 }, { "epoch": 0.6959605303731113, "grad_norm": 3.8755156993865967, "learning_rate": 2.2347478532187562e-06, "loss": 0.8957, "step": 9028 }, { "epoch": 0.6960376194881283, "grad_norm": 3.3811306953430176, "learning_rate": 2.2337077983769902e-06, "loss": 0.8254, "step": 9029 }, { "epoch": 0.6961147086031453, "grad_norm": 3.608715295791626, "learning_rate": 2.232667915999216e-06, "loss": 0.8726, "step": 9030 }, { "epoch": 0.6961917977181622, "grad_norm": 3.5603222846984863, "learning_rate": 2.2316282061502666e-06, "loss": 0.9169, "step": 9031 }, { "epoch": 0.6962688868331791, "grad_norm": 3.8038291931152344, "learning_rate": 2.230588668894959e-06, "loss": 0.874, "step": 9032 }, { "epoch": 0.6963459759481961, "grad_norm": 3.717751979827881, "learning_rate": 2.229549304298105e-06, "loss": 0.9489, "step": 9033 }, { "epoch": 0.696423065063213, "grad_norm": 4.030141830444336, "learning_rate": 2.228510112424503e-06, "loss": 0.9211, "step": 9034 }, { "epoch": 0.6965001541782301, "grad_norm": 4.214062213897705, "learning_rate": 2.2274710933389423e-06, "loss": 0.9463, "step": 9035 }, { "epoch": 0.696577243293247, "grad_norm": 3.714287519454956, "learning_rate": 2.226432247106199e-06, "loss": 0.9016, "step": 9036 }, { "epoch": 0.6966543324082639, "grad_norm": 3.579857110977173, "learning_rate": 2.225393573791042e-06, "loss": 0.8215, "step": 9037 }, { "epoch": 0.6967314215232809, "grad_norm": 3.9160430431365967, "learning_rate": 2.2243550734582243e-06, "loss": 0.8289, "step": 9038 }, { "epoch": 0.6968085106382979, "grad_norm": 3.4613399505615234, "learning_rate": 2.223316746172492e-06, "loss": 0.8294, "step": 9039 }, { "epoch": 0.6968855997533149, "grad_norm": 3.514765977859497, "learning_rate": 2.22227859199858e-06, "loss": 0.9109, "step": 9040 }, { "epoch": 0.6969626888683318, "grad_norm": 3.923628807067871, "learning_rate": 2.2212406110012113e-06, "loss": 0.9976, "step": 9041 }, { "epoch": 0.6970397779833487, "grad_norm": 3.940729856491089, "learning_rate": 2.2202028032451013e-06, "loss": 0.9827, "step": 9042 }, { "epoch": 0.6971168670983657, "grad_norm": 3.6197614669799805, "learning_rate": 2.2191651687949473e-06, "loss": 0.991, "step": 9043 }, { "epoch": 0.6971939562133826, "grad_norm": 3.794126510620117, "learning_rate": 2.218127707715443e-06, "loss": 0.9555, "step": 9044 }, { "epoch": 0.6972710453283997, "grad_norm": 3.6639597415924072, "learning_rate": 2.2170904200712684e-06, "loss": 0.968, "step": 9045 }, { "epoch": 0.6973481344434166, "grad_norm": 3.9340016841888428, "learning_rate": 2.216053305927093e-06, "loss": 0.9202, "step": 9046 }, { "epoch": 0.6974252235584335, "grad_norm": 3.675588846206665, "learning_rate": 2.2150163653475752e-06, "loss": 0.7814, "step": 9047 }, { "epoch": 0.6975023126734505, "grad_norm": 3.601801633834839, "learning_rate": 2.2139795983973654e-06, "loss": 0.8856, "step": 9048 }, { "epoch": 0.6975794017884674, "grad_norm": 3.440999746322632, "learning_rate": 2.2129430051410967e-06, "loss": 0.9017, "step": 9049 }, { "epoch": 0.6976564909034845, "grad_norm": 3.5719244480133057, "learning_rate": 2.211906585643397e-06, "loss": 0.8295, "step": 9050 }, { "epoch": 0.6977335800185014, "grad_norm": 4.056939601898193, "learning_rate": 2.210870339968881e-06, "loss": 0.9759, "step": 9051 }, { "epoch": 0.6978106691335183, "grad_norm": 3.9863460063934326, "learning_rate": 2.2098342681821555e-06, "loss": 0.9698, "step": 9052 }, { "epoch": 0.6978877582485353, "grad_norm": 3.962463140487671, "learning_rate": 2.2087983703478118e-06, "loss": 0.9996, "step": 9053 }, { "epoch": 0.6979648473635522, "grad_norm": 3.7303974628448486, "learning_rate": 2.207762646530434e-06, "loss": 0.9972, "step": 9054 }, { "epoch": 0.6980419364785693, "grad_norm": 3.459636688232422, "learning_rate": 2.2067270967945936e-06, "loss": 0.8566, "step": 9055 }, { "epoch": 0.6981190255935862, "grad_norm": 3.582228183746338, "learning_rate": 2.2056917212048522e-06, "loss": 0.9435, "step": 9056 }, { "epoch": 0.6981961147086031, "grad_norm": 3.553546905517578, "learning_rate": 2.2046565198257595e-06, "loss": 0.9016, "step": 9057 }, { "epoch": 0.6982732038236201, "grad_norm": 3.933868646621704, "learning_rate": 2.203621492721858e-06, "loss": 0.9479, "step": 9058 }, { "epoch": 0.698350292938637, "grad_norm": 3.995857000350952, "learning_rate": 2.2025866399576713e-06, "loss": 0.98, "step": 9059 }, { "epoch": 0.6984273820536541, "grad_norm": 3.842726469039917, "learning_rate": 2.2015519615977193e-06, "loss": 0.9618, "step": 9060 }, { "epoch": 0.698504471168671, "grad_norm": 3.7340540885925293, "learning_rate": 2.2005174577065085e-06, "loss": 0.9015, "step": 9061 }, { "epoch": 0.6985815602836879, "grad_norm": 3.7171812057495117, "learning_rate": 2.1994831283485363e-06, "loss": 0.9996, "step": 9062 }, { "epoch": 0.6986586493987049, "grad_norm": 3.576258420944214, "learning_rate": 2.198448973588288e-06, "loss": 0.8816, "step": 9063 }, { "epoch": 0.6987357385137218, "grad_norm": 3.6319639682769775, "learning_rate": 2.197414993490235e-06, "loss": 0.9423, "step": 9064 }, { "epoch": 0.6988128276287389, "grad_norm": 3.287196159362793, "learning_rate": 2.1963811881188423e-06, "loss": 0.8847, "step": 9065 }, { "epoch": 0.6988899167437558, "grad_norm": 3.3368561267852783, "learning_rate": 2.1953475575385618e-06, "loss": 0.8119, "step": 9066 }, { "epoch": 0.6989670058587727, "grad_norm": 3.6596055030822754, "learning_rate": 2.1943141018138357e-06, "loss": 1.0, "step": 9067 }, { "epoch": 0.6990440949737897, "grad_norm": 3.594658851623535, "learning_rate": 2.1932808210090963e-06, "loss": 0.867, "step": 9068 }, { "epoch": 0.6991211840888066, "grad_norm": 4.087553977966309, "learning_rate": 2.1922477151887595e-06, "loss": 0.915, "step": 9069 }, { "epoch": 0.6991982732038237, "grad_norm": 3.692249059677124, "learning_rate": 2.1912147844172354e-06, "loss": 0.8633, "step": 9070 }, { "epoch": 0.6992753623188406, "grad_norm": 3.7603821754455566, "learning_rate": 2.1901820287589227e-06, "loss": 0.8733, "step": 9071 }, { "epoch": 0.6993524514338575, "grad_norm": 3.858159303665161, "learning_rate": 2.189149448278208e-06, "loss": 0.7904, "step": 9072 }, { "epoch": 0.6994295405488745, "grad_norm": 3.873694896697998, "learning_rate": 2.188117043039468e-06, "loss": 0.9063, "step": 9073 }, { "epoch": 0.6995066296638914, "grad_norm": 3.4330713748931885, "learning_rate": 2.187084813107069e-06, "loss": 0.9582, "step": 9074 }, { "epoch": 0.6995837187789085, "grad_norm": 3.4164810180664062, "learning_rate": 2.186052758545361e-06, "loss": 0.8979, "step": 9075 }, { "epoch": 0.6996608078939254, "grad_norm": 3.9505198001861572, "learning_rate": 2.18502087941869e-06, "loss": 0.9866, "step": 9076 }, { "epoch": 0.6997378970089423, "grad_norm": 3.860844373703003, "learning_rate": 2.183989175791388e-06, "loss": 0.9133, "step": 9077 }, { "epoch": 0.6998149861239593, "grad_norm": 3.6044540405273438, "learning_rate": 2.1829576477277765e-06, "loss": 0.9124, "step": 9078 }, { "epoch": 0.6998920752389762, "grad_norm": 3.7315444946289062, "learning_rate": 2.181926295292167e-06, "loss": 0.9567, "step": 9079 }, { "epoch": 0.6999691643539933, "grad_norm": 3.6717987060546875, "learning_rate": 2.180895118548857e-06, "loss": 0.9217, "step": 9080 }, { "epoch": 0.7000462534690102, "grad_norm": 4.249053478240967, "learning_rate": 2.1798641175621354e-06, "loss": 1.0054, "step": 9081 }, { "epoch": 0.7001233425840271, "grad_norm": 3.520657539367676, "learning_rate": 2.17883329239628e-06, "loss": 0.8778, "step": 9082 }, { "epoch": 0.7002004316990441, "grad_norm": 3.7700884342193604, "learning_rate": 2.177802643115558e-06, "loss": 0.9893, "step": 9083 }, { "epoch": 0.700277520814061, "grad_norm": 3.6206729412078857, "learning_rate": 2.1767721697842244e-06, "loss": 0.8638, "step": 9084 }, { "epoch": 0.700354609929078, "grad_norm": 3.6218421459198, "learning_rate": 2.1757418724665263e-06, "loss": 0.9983, "step": 9085 }, { "epoch": 0.700431699044095, "grad_norm": 3.8477892875671387, "learning_rate": 2.1747117512266928e-06, "loss": 1.0336, "step": 9086 }, { "epoch": 0.7005087881591119, "grad_norm": 3.7004141807556152, "learning_rate": 2.1736818061289492e-06, "loss": 0.8383, "step": 9087 }, { "epoch": 0.7005858772741289, "grad_norm": 3.667736291885376, "learning_rate": 2.1726520372375076e-06, "loss": 0.9925, "step": 9088 }, { "epoch": 0.7006629663891458, "grad_norm": 3.574789047241211, "learning_rate": 2.1716224446165678e-06, "loss": 0.7904, "step": 9089 }, { "epoch": 0.7007400555041629, "grad_norm": 3.753038167953491, "learning_rate": 2.1705930283303222e-06, "loss": 0.8674, "step": 9090 }, { "epoch": 0.7008171446191798, "grad_norm": 3.9094438552856445, "learning_rate": 2.1695637884429456e-06, "loss": 0.9338, "step": 9091 }, { "epoch": 0.7008942337341967, "grad_norm": 3.6531260013580322, "learning_rate": 2.1685347250186073e-06, "loss": 0.9804, "step": 9092 }, { "epoch": 0.7009713228492137, "grad_norm": 3.6628949642181396, "learning_rate": 2.1675058381214647e-06, "loss": 0.9991, "step": 9093 }, { "epoch": 0.7010484119642306, "grad_norm": 3.8137435913085938, "learning_rate": 2.166477127815663e-06, "loss": 0.8553, "step": 9094 }, { "epoch": 0.7011255010792476, "grad_norm": 3.7563507556915283, "learning_rate": 2.1654485941653374e-06, "loss": 0.8153, "step": 9095 }, { "epoch": 0.7012025901942646, "grad_norm": 3.9121322631835938, "learning_rate": 2.164420237234611e-06, "loss": 0.9299, "step": 9096 }, { "epoch": 0.7012796793092815, "grad_norm": 3.449474811553955, "learning_rate": 2.163392057087597e-06, "loss": 0.9556, "step": 9097 }, { "epoch": 0.7013567684242985, "grad_norm": 3.6520328521728516, "learning_rate": 2.1623640537883977e-06, "loss": 0.8858, "step": 9098 }, { "epoch": 0.7014338575393154, "grad_norm": 3.4059646129608154, "learning_rate": 2.1613362274011025e-06, "loss": 0.8044, "step": 9099 }, { "epoch": 0.7015109466543324, "grad_norm": 3.759131669998169, "learning_rate": 2.1603085779897935e-06, "loss": 0.8184, "step": 9100 }, { "epoch": 0.7015880357693494, "grad_norm": 3.839765787124634, "learning_rate": 2.1592811056185363e-06, "loss": 0.9364, "step": 9101 }, { "epoch": 0.7016651248843663, "grad_norm": 3.4155290126800537, "learning_rate": 2.1582538103513896e-06, "loss": 0.835, "step": 9102 }, { "epoch": 0.7017422139993833, "grad_norm": 3.8770790100097656, "learning_rate": 2.1572266922524e-06, "loss": 1.0204, "step": 9103 }, { "epoch": 0.7018193031144002, "grad_norm": 3.4858505725860596, "learning_rate": 2.1561997513856027e-06, "loss": 0.9334, "step": 9104 }, { "epoch": 0.7018963922294172, "grad_norm": 3.834233283996582, "learning_rate": 2.155172987815024e-06, "loss": 0.9136, "step": 9105 }, { "epoch": 0.7019734813444342, "grad_norm": 4.0034050941467285, "learning_rate": 2.1541464016046747e-06, "loss": 1.0584, "step": 9106 }, { "epoch": 0.7020505704594511, "grad_norm": 3.6104769706726074, "learning_rate": 2.153119992818558e-06, "loss": 0.9131, "step": 9107 }, { "epoch": 0.7021276595744681, "grad_norm": 3.7609310150146484, "learning_rate": 2.152093761520665e-06, "loss": 0.9231, "step": 9108 }, { "epoch": 0.702204748689485, "grad_norm": 3.517526388168335, "learning_rate": 2.151067707774977e-06, "loss": 0.9278, "step": 9109 }, { "epoch": 0.702281837804502, "grad_norm": 3.942485809326172, "learning_rate": 2.150041831645462e-06, "loss": 1.0325, "step": 9110 }, { "epoch": 0.702358926919519, "grad_norm": 3.776834487915039, "learning_rate": 2.14901613319608e-06, "loss": 0.9948, "step": 9111 }, { "epoch": 0.7024360160345359, "grad_norm": 3.855884552001953, "learning_rate": 2.147990612490775e-06, "loss": 1.0399, "step": 9112 }, { "epoch": 0.7025131051495529, "grad_norm": 4.162666320800781, "learning_rate": 2.1469652695934847e-06, "loss": 0.8753, "step": 9113 }, { "epoch": 0.7025901942645698, "grad_norm": 3.925414800643921, "learning_rate": 2.145940104568133e-06, "loss": 0.9324, "step": 9114 }, { "epoch": 0.7026672833795868, "grad_norm": 3.718440294265747, "learning_rate": 2.1449151174786343e-06, "loss": 0.8593, "step": 9115 }, { "epoch": 0.7027443724946038, "grad_norm": 3.670015335083008, "learning_rate": 2.143890308388893e-06, "loss": 0.7962, "step": 9116 }, { "epoch": 0.7028214616096207, "grad_norm": 3.8344802856445312, "learning_rate": 2.142865677362797e-06, "loss": 0.9722, "step": 9117 }, { "epoch": 0.7028985507246377, "grad_norm": 3.786147117614746, "learning_rate": 2.141841224464229e-06, "loss": 0.8701, "step": 9118 }, { "epoch": 0.7029756398396546, "grad_norm": 3.831787109375, "learning_rate": 2.1408169497570576e-06, "loss": 0.8794, "step": 9119 }, { "epoch": 0.7030527289546716, "grad_norm": 3.9900169372558594, "learning_rate": 2.139792853305141e-06, "loss": 1.055, "step": 9120 }, { "epoch": 0.7031298180696886, "grad_norm": 3.954350233078003, "learning_rate": 2.1387689351723266e-06, "loss": 0.952, "step": 9121 }, { "epoch": 0.7032069071847055, "grad_norm": 3.8101086616516113, "learning_rate": 2.1377451954224526e-06, "loss": 0.94, "step": 9122 }, { "epoch": 0.7032839962997225, "grad_norm": 4.078210353851318, "learning_rate": 2.13672163411934e-06, "loss": 0.9399, "step": 9123 }, { "epoch": 0.7033610854147394, "grad_norm": 3.5562384128570557, "learning_rate": 2.1356982513268034e-06, "loss": 0.806, "step": 9124 }, { "epoch": 0.7034381745297564, "grad_norm": 3.3538687229156494, "learning_rate": 2.134675047108647e-06, "loss": 0.8706, "step": 9125 }, { "epoch": 0.7035152636447733, "grad_norm": 4.081210136413574, "learning_rate": 2.133652021528661e-06, "loss": 0.8758, "step": 9126 }, { "epoch": 0.7035923527597903, "grad_norm": 4.063713550567627, "learning_rate": 2.1326291746506283e-06, "loss": 1.0671, "step": 9127 }, { "epoch": 0.7036694418748073, "grad_norm": 3.4613239765167236, "learning_rate": 2.131606506538314e-06, "loss": 0.8831, "step": 9128 }, { "epoch": 0.7037465309898242, "grad_norm": 3.84372878074646, "learning_rate": 2.1305840172554786e-06, "loss": 1.0199, "step": 9129 }, { "epoch": 0.7038236201048412, "grad_norm": 3.5245513916015625, "learning_rate": 2.1295617068658685e-06, "loss": 0.8048, "step": 9130 }, { "epoch": 0.7039007092198581, "grad_norm": 3.294621706008911, "learning_rate": 2.1285395754332198e-06, "loss": 0.8333, "step": 9131 }, { "epoch": 0.7039777983348752, "grad_norm": 3.9054811000823975, "learning_rate": 2.1275176230212568e-06, "loss": 0.8569, "step": 9132 }, { "epoch": 0.7040548874498921, "grad_norm": 3.4768083095550537, "learning_rate": 2.1264958496936945e-06, "loss": 0.8138, "step": 9133 }, { "epoch": 0.704131976564909, "grad_norm": 3.709878921508789, "learning_rate": 2.125474255514232e-06, "loss": 0.925, "step": 9134 }, { "epoch": 0.704209065679926, "grad_norm": 3.6700382232666016, "learning_rate": 2.1244528405465626e-06, "loss": 0.9451, "step": 9135 }, { "epoch": 0.7042861547949429, "grad_norm": 3.370394468307495, "learning_rate": 2.123431604854365e-06, "loss": 0.8803, "step": 9136 }, { "epoch": 0.70436324390996, "grad_norm": 3.7478532791137695, "learning_rate": 2.1224105485013096e-06, "loss": 0.8755, "step": 9137 }, { "epoch": 0.7044403330249769, "grad_norm": 3.732043981552124, "learning_rate": 2.121389671551054e-06, "loss": 0.9241, "step": 9138 }, { "epoch": 0.7045174221399938, "grad_norm": 3.701848268508911, "learning_rate": 2.120368974067242e-06, "loss": 0.9204, "step": 9139 }, { "epoch": 0.7045945112550108, "grad_norm": 3.8429481983184814, "learning_rate": 2.11934845611351e-06, "loss": 1.0072, "step": 9140 }, { "epoch": 0.7046716003700277, "grad_norm": 3.9448812007904053, "learning_rate": 2.1183281177534828e-06, "loss": 0.945, "step": 9141 }, { "epoch": 0.7047486894850448, "grad_norm": 3.5992908477783203, "learning_rate": 2.1173079590507735e-06, "loss": 0.9431, "step": 9142 }, { "epoch": 0.7048257786000617, "grad_norm": 3.7120702266693115, "learning_rate": 2.1162879800689845e-06, "loss": 0.7797, "step": 9143 }, { "epoch": 0.7049028677150786, "grad_norm": 3.8240060806274414, "learning_rate": 2.1152681808717033e-06, "loss": 0.9258, "step": 9144 }, { "epoch": 0.7049799568300956, "grad_norm": 3.821179151535034, "learning_rate": 2.1142485615225104e-06, "loss": 0.869, "step": 9145 }, { "epoch": 0.7050570459451125, "grad_norm": 4.0263142585754395, "learning_rate": 2.1132291220849743e-06, "loss": 0.9052, "step": 9146 }, { "epoch": 0.7051341350601296, "grad_norm": 3.7679524421691895, "learning_rate": 2.1122098626226512e-06, "loss": 1.0384, "step": 9147 }, { "epoch": 0.7052112241751465, "grad_norm": 3.6580440998077393, "learning_rate": 2.111190783199089e-06, "loss": 0.9052, "step": 9148 }, { "epoch": 0.7052883132901634, "grad_norm": 3.7239935398101807, "learning_rate": 2.110171883877818e-06, "loss": 0.9718, "step": 9149 }, { "epoch": 0.7053654024051804, "grad_norm": 4.641191482543945, "learning_rate": 2.1091531647223632e-06, "loss": 1.015, "step": 9150 }, { "epoch": 0.7054424915201973, "grad_norm": 3.7391200065612793, "learning_rate": 2.1081346257962367e-06, "loss": 0.9439, "step": 9151 }, { "epoch": 0.7055195806352144, "grad_norm": 3.7103328704833984, "learning_rate": 2.1071162671629386e-06, "loss": 0.9708, "step": 9152 }, { "epoch": 0.7055966697502313, "grad_norm": 3.7474148273468018, "learning_rate": 2.1060980888859608e-06, "loss": 0.9439, "step": 9153 }, { "epoch": 0.7056737588652482, "grad_norm": 3.799514055252075, "learning_rate": 2.1050800910287774e-06, "loss": 0.961, "step": 9154 }, { "epoch": 0.7057508479802652, "grad_norm": 3.7274961471557617, "learning_rate": 2.1040622736548566e-06, "loss": 0.871, "step": 9155 }, { "epoch": 0.7058279370952821, "grad_norm": 4.0381669998168945, "learning_rate": 2.1030446368276547e-06, "loss": 0.8929, "step": 9156 }, { "epoch": 0.7059050262102992, "grad_norm": 3.871047019958496, "learning_rate": 2.102027180610616e-06, "loss": 0.9771, "step": 9157 }, { "epoch": 0.7059821153253161, "grad_norm": 3.7191429138183594, "learning_rate": 2.1010099050671745e-06, "loss": 0.9002, "step": 9158 }, { "epoch": 0.706059204440333, "grad_norm": 3.723458766937256, "learning_rate": 2.099992810260752e-06, "loss": 0.9915, "step": 9159 }, { "epoch": 0.70613629355535, "grad_norm": 4.22058629989624, "learning_rate": 2.098975896254757e-06, "loss": 1.0003, "step": 9160 }, { "epoch": 0.7062133826703669, "grad_norm": 3.7381343841552734, "learning_rate": 2.0979591631125896e-06, "loss": 0.9054, "step": 9161 }, { "epoch": 0.706290471785384, "grad_norm": 3.518643379211426, "learning_rate": 2.096942610897639e-06, "loss": 0.9081, "step": 9162 }, { "epoch": 0.7063675609004009, "grad_norm": 3.9908714294433594, "learning_rate": 2.0959262396732804e-06, "loss": 0.9465, "step": 9163 }, { "epoch": 0.7064446500154178, "grad_norm": 3.8830153942108154, "learning_rate": 2.094910049502882e-06, "loss": 0.8796, "step": 9164 }, { "epoch": 0.7065217391304348, "grad_norm": 3.6630706787109375, "learning_rate": 2.093894040449795e-06, "loss": 0.8835, "step": 9165 }, { "epoch": 0.7065988282454517, "grad_norm": 3.9855260848999023, "learning_rate": 2.0928782125773636e-06, "loss": 1.0637, "step": 9166 }, { "epoch": 0.7066759173604688, "grad_norm": 3.887150764465332, "learning_rate": 2.091862565948919e-06, "loss": 0.9717, "step": 9167 }, { "epoch": 0.7067530064754857, "grad_norm": 3.4647603034973145, "learning_rate": 2.0908471006277816e-06, "loss": 0.92, "step": 9168 }, { "epoch": 0.7068300955905026, "grad_norm": 3.9277379512786865, "learning_rate": 2.089831816677261e-06, "loss": 0.9405, "step": 9169 }, { "epoch": 0.7069071847055196, "grad_norm": 3.964707136154175, "learning_rate": 2.088816714160656e-06, "loss": 1.0286, "step": 9170 }, { "epoch": 0.7069842738205365, "grad_norm": 3.6983249187469482, "learning_rate": 2.0878017931412493e-06, "loss": 1.0521, "step": 9171 }, { "epoch": 0.7070613629355536, "grad_norm": 3.797630548477173, "learning_rate": 2.0867870536823185e-06, "loss": 0.9313, "step": 9172 }, { "epoch": 0.7071384520505705, "grad_norm": 3.392491340637207, "learning_rate": 2.0857724958471273e-06, "loss": 0.742, "step": 9173 }, { "epoch": 0.7072155411655874, "grad_norm": 3.4460275173187256, "learning_rate": 2.0847581196989277e-06, "loss": 0.8896, "step": 9174 }, { "epoch": 0.7072926302806044, "grad_norm": 4.643573760986328, "learning_rate": 2.0837439253009623e-06, "loss": 0.9268, "step": 9175 }, { "epoch": 0.7073697193956213, "grad_norm": 3.4909539222717285, "learning_rate": 2.0827299127164574e-06, "loss": 0.9444, "step": 9176 }, { "epoch": 0.7074468085106383, "grad_norm": 3.7072372436523438, "learning_rate": 2.0817160820086342e-06, "loss": 0.9454, "step": 9177 }, { "epoch": 0.7075238976256553, "grad_norm": 3.618695020675659, "learning_rate": 2.080702433240699e-06, "loss": 0.9146, "step": 9178 }, { "epoch": 0.7076009867406722, "grad_norm": 3.594953775405884, "learning_rate": 2.079688966475847e-06, "loss": 0.9275, "step": 9179 }, { "epoch": 0.7076780758556892, "grad_norm": 3.649266004562378, "learning_rate": 2.078675681777264e-06, "loss": 0.8821, "step": 9180 }, { "epoch": 0.7077551649707061, "grad_norm": 3.623678207397461, "learning_rate": 2.077662579208124e-06, "loss": 0.9072, "step": 9181 }, { "epoch": 0.7078322540857231, "grad_norm": 3.6807639598846436, "learning_rate": 2.0766496588315853e-06, "loss": 0.9662, "step": 9182 }, { "epoch": 0.7079093432007401, "grad_norm": 3.5517539978027344, "learning_rate": 2.0756369207107997e-06, "loss": 0.8944, "step": 9183 }, { "epoch": 0.707986432315757, "grad_norm": 3.9906022548675537, "learning_rate": 2.0746243649089065e-06, "loss": 0.8717, "step": 9184 }, { "epoch": 0.708063521430774, "grad_norm": 3.7278897762298584, "learning_rate": 2.0736119914890335e-06, "loss": 0.9515, "step": 9185 }, { "epoch": 0.7081406105457909, "grad_norm": 4.302172660827637, "learning_rate": 2.072599800514296e-06, "loss": 0.9035, "step": 9186 }, { "epoch": 0.7082176996608079, "grad_norm": 4.022290229797363, "learning_rate": 2.0715877920478e-06, "loss": 0.8397, "step": 9187 }, { "epoch": 0.7082947887758249, "grad_norm": 3.2679741382598877, "learning_rate": 2.0705759661526387e-06, "loss": 0.7864, "step": 9188 }, { "epoch": 0.7083718778908418, "grad_norm": 3.836318254470825, "learning_rate": 2.069564322891894e-06, "loss": 0.9187, "step": 9189 }, { "epoch": 0.7084489670058588, "grad_norm": 4.069839000701904, "learning_rate": 2.068552862328637e-06, "loss": 1.0155, "step": 9190 }, { "epoch": 0.7085260561208757, "grad_norm": 3.7692060470581055, "learning_rate": 2.067541584525927e-06, "loss": 0.8143, "step": 9191 }, { "epoch": 0.7086031452358927, "grad_norm": 3.5922646522521973, "learning_rate": 2.0665304895468114e-06, "loss": 0.8584, "step": 9192 }, { "epoch": 0.7086802343509097, "grad_norm": 3.649376630783081, "learning_rate": 2.065519577454326e-06, "loss": 0.9411, "step": 9193 }, { "epoch": 0.7087573234659266, "grad_norm": 3.5520145893096924, "learning_rate": 2.0645088483114974e-06, "loss": 0.8421, "step": 9194 }, { "epoch": 0.7088344125809436, "grad_norm": 4.203880310058594, "learning_rate": 2.0634983021813385e-06, "loss": 0.9849, "step": 9195 }, { "epoch": 0.7089115016959605, "grad_norm": 3.6634862422943115, "learning_rate": 2.062487939126854e-06, "loss": 0.9275, "step": 9196 }, { "epoch": 0.7089885908109775, "grad_norm": 3.71562123298645, "learning_rate": 2.0614777592110306e-06, "loss": 0.8916, "step": 9197 }, { "epoch": 0.7090656799259945, "grad_norm": 3.9342494010925293, "learning_rate": 2.06046776249685e-06, "loss": 0.8933, "step": 9198 }, { "epoch": 0.7091427690410114, "grad_norm": 3.345992088317871, "learning_rate": 2.0594579490472803e-06, "loss": 0.8077, "step": 9199 }, { "epoch": 0.7092198581560284, "grad_norm": 3.8320610523223877, "learning_rate": 2.058448318925278e-06, "loss": 0.9362, "step": 9200 }, { "epoch": 0.7092969472710453, "grad_norm": 3.6400604248046875, "learning_rate": 2.0574388721937905e-06, "loss": 0.8823, "step": 9201 }, { "epoch": 0.7093740363860623, "grad_norm": 3.9985427856445312, "learning_rate": 2.056429608915747e-06, "loss": 1.0019, "step": 9202 }, { "epoch": 0.7094511255010792, "grad_norm": 3.3883984088897705, "learning_rate": 2.0554205291540724e-06, "loss": 0.8021, "step": 9203 }, { "epoch": 0.7095282146160962, "grad_norm": 3.7378809452056885, "learning_rate": 2.0544116329716773e-06, "loss": 0.8293, "step": 9204 }, { "epoch": 0.7096053037311132, "grad_norm": 3.601414918899536, "learning_rate": 2.0534029204314613e-06, "loss": 0.8926, "step": 9205 }, { "epoch": 0.7096823928461301, "grad_norm": 3.2823150157928467, "learning_rate": 2.052394391596313e-06, "loss": 0.7918, "step": 9206 }, { "epoch": 0.7097594819611471, "grad_norm": 4.111335277557373, "learning_rate": 2.0513860465291097e-06, "loss": 0.8922, "step": 9207 }, { "epoch": 0.709836571076164, "grad_norm": 3.9760074615478516, "learning_rate": 2.0503778852927134e-06, "loss": 0.9042, "step": 9208 }, { "epoch": 0.709913660191181, "grad_norm": 3.659054756164551, "learning_rate": 2.0493699079499797e-06, "loss": 0.8971, "step": 9209 }, { "epoch": 0.709990749306198, "grad_norm": 3.8829121589660645, "learning_rate": 2.04836211456375e-06, "loss": 0.8574, "step": 9210 }, { "epoch": 0.7100678384212149, "grad_norm": 3.769808292388916, "learning_rate": 2.0473545051968557e-06, "loss": 0.777, "step": 9211 }, { "epoch": 0.7101449275362319, "grad_norm": 3.7595462799072266, "learning_rate": 2.0463470799121177e-06, "loss": 0.8837, "step": 9212 }, { "epoch": 0.7102220166512488, "grad_norm": 3.825510025024414, "learning_rate": 2.04533983877234e-06, "loss": 0.9327, "step": 9213 }, { "epoch": 0.7102991057662658, "grad_norm": 3.5481746196746826, "learning_rate": 2.0443327818403213e-06, "loss": 0.9789, "step": 9214 }, { "epoch": 0.7103761948812828, "grad_norm": 3.591843366622925, "learning_rate": 2.0433259091788453e-06, "loss": 0.9067, "step": 9215 }, { "epoch": 0.7104532839962997, "grad_norm": 4.088891983032227, "learning_rate": 2.042319220850686e-06, "loss": 0.9353, "step": 9216 }, { "epoch": 0.7105303731113167, "grad_norm": 3.6502561569213867, "learning_rate": 2.0413127169186053e-06, "loss": 0.9367, "step": 9217 }, { "epoch": 0.7106074622263336, "grad_norm": 3.281320810317993, "learning_rate": 2.0403063974453547e-06, "loss": 0.8194, "step": 9218 }, { "epoch": 0.7106845513413506, "grad_norm": 3.4343175888061523, "learning_rate": 2.03930026249367e-06, "loss": 0.7826, "step": 9219 }, { "epoch": 0.7107616404563676, "grad_norm": 3.537015438079834, "learning_rate": 2.03829431212628e-06, "loss": 0.9891, "step": 9220 }, { "epoch": 0.7108387295713845, "grad_norm": 3.6004459857940674, "learning_rate": 2.0372885464059004e-06, "loss": 0.9879, "step": 9221 }, { "epoch": 0.7109158186864015, "grad_norm": 3.736265182495117, "learning_rate": 2.036282965395236e-06, "loss": 0.8828, "step": 9222 }, { "epoch": 0.7109929078014184, "grad_norm": 3.9425337314605713, "learning_rate": 2.035277569156981e-06, "loss": 1.03, "step": 9223 }, { "epoch": 0.7110699969164354, "grad_norm": 3.6420586109161377, "learning_rate": 2.0342723577538125e-06, "loss": 1.0069, "step": 9224 }, { "epoch": 0.7111470860314524, "grad_norm": 3.814761161804199, "learning_rate": 2.0332673312484037e-06, "loss": 1.0226, "step": 9225 }, { "epoch": 0.7112241751464693, "grad_norm": 3.7190582752227783, "learning_rate": 2.0322624897034102e-06, "loss": 0.8863, "step": 9226 }, { "epoch": 0.7113012642614863, "grad_norm": 3.9217827320098877, "learning_rate": 2.0312578331814812e-06, "loss": 0.9611, "step": 9227 }, { "epoch": 0.7113783533765032, "grad_norm": 3.8105664253234863, "learning_rate": 2.030253361745251e-06, "loss": 0.9657, "step": 9228 }, { "epoch": 0.7114554424915202, "grad_norm": 3.696140766143799, "learning_rate": 2.0292490754573425e-06, "loss": 0.9549, "step": 9229 }, { "epoch": 0.7115325316065372, "grad_norm": 3.7637147903442383, "learning_rate": 2.0282449743803684e-06, "loss": 0.9717, "step": 9230 }, { "epoch": 0.7116096207215541, "grad_norm": 3.541837692260742, "learning_rate": 2.0272410585769284e-06, "loss": 0.8398, "step": 9231 }, { "epoch": 0.7116867098365711, "grad_norm": 3.7558746337890625, "learning_rate": 2.0262373281096133e-06, "loss": 0.9397, "step": 9232 }, { "epoch": 0.711763798951588, "grad_norm": 3.951739549636841, "learning_rate": 2.025233783041e-06, "loss": 1.0248, "step": 9233 }, { "epoch": 0.711840888066605, "grad_norm": 3.583740472793579, "learning_rate": 2.0242304234336525e-06, "loss": 0.9207, "step": 9234 }, { "epoch": 0.711917977181622, "grad_norm": 3.8319308757781982, "learning_rate": 2.0232272493501253e-06, "loss": 0.9674, "step": 9235 }, { "epoch": 0.7119950662966389, "grad_norm": 3.4977939128875732, "learning_rate": 2.022224260852963e-06, "loss": 0.8959, "step": 9236 }, { "epoch": 0.7120721554116559, "grad_norm": 3.4424352645874023, "learning_rate": 2.021221458004695e-06, "loss": 0.8286, "step": 9237 }, { "epoch": 0.7121492445266728, "grad_norm": 3.8004579544067383, "learning_rate": 2.020218840867842e-06, "loss": 1.0063, "step": 9238 }, { "epoch": 0.7122263336416897, "grad_norm": 3.6114730834960938, "learning_rate": 2.019216409504913e-06, "loss": 0.8109, "step": 9239 }, { "epoch": 0.7123034227567068, "grad_norm": 3.892796516418457, "learning_rate": 2.018214163978402e-06, "loss": 1.0058, "step": 9240 }, { "epoch": 0.7123805118717237, "grad_norm": 3.875223159790039, "learning_rate": 2.0172121043507943e-06, "loss": 0.9591, "step": 9241 }, { "epoch": 0.7124576009867407, "grad_norm": 4.0388665199279785, "learning_rate": 2.016210230684564e-06, "loss": 0.9484, "step": 9242 }, { "epoch": 0.7125346901017576, "grad_norm": 3.8614680767059326, "learning_rate": 2.015208543042172e-06, "loss": 1.0, "step": 9243 }, { "epoch": 0.7126117792167745, "grad_norm": 3.793128252029419, "learning_rate": 2.0142070414860704e-06, "loss": 0.9818, "step": 9244 }, { "epoch": 0.7126888683317916, "grad_norm": 3.489015817642212, "learning_rate": 2.0132057260786943e-06, "loss": 0.9183, "step": 9245 }, { "epoch": 0.7127659574468085, "grad_norm": 3.5036089420318604, "learning_rate": 2.012204596882472e-06, "loss": 0.9126, "step": 9246 }, { "epoch": 0.7128430465618255, "grad_norm": 3.7784531116485596, "learning_rate": 2.011203653959819e-06, "loss": 0.902, "step": 9247 }, { "epoch": 0.7129201356768424, "grad_norm": 3.950711488723755, "learning_rate": 2.0102028973731393e-06, "loss": 0.9668, "step": 9248 }, { "epoch": 0.7129972247918593, "grad_norm": 3.538501501083374, "learning_rate": 2.0092023271848254e-06, "loss": 0.9317, "step": 9249 }, { "epoch": 0.7130743139068764, "grad_norm": 3.5862019062042236, "learning_rate": 2.008201943457255e-06, "loss": 0.8727, "step": 9250 }, { "epoch": 0.7131514030218933, "grad_norm": 3.7320566177368164, "learning_rate": 2.007201746252799e-06, "loss": 1.0035, "step": 9251 }, { "epoch": 0.7132284921369103, "grad_norm": 3.5628163814544678, "learning_rate": 2.0062017356338136e-06, "loss": 0.8501, "step": 9252 }, { "epoch": 0.7133055812519272, "grad_norm": 3.4357900619506836, "learning_rate": 2.0052019116626446e-06, "loss": 0.867, "step": 9253 }, { "epoch": 0.7133826703669441, "grad_norm": 3.7662131786346436, "learning_rate": 2.0042022744016264e-06, "loss": 1.0013, "step": 9254 }, { "epoch": 0.7134597594819612, "grad_norm": 3.847144842147827, "learning_rate": 2.0032028239130824e-06, "loss": 1.0047, "step": 9255 }, { "epoch": 0.7135368485969781, "grad_norm": 3.619068145751953, "learning_rate": 2.00220356025932e-06, "loss": 0.8234, "step": 9256 }, { "epoch": 0.7136139377119951, "grad_norm": 3.6342787742614746, "learning_rate": 2.001204483502639e-06, "loss": 0.8697, "step": 9257 }, { "epoch": 0.713691026827012, "grad_norm": 3.920424222946167, "learning_rate": 2.000205593705328e-06, "loss": 1.0292, "step": 9258 }, { "epoch": 0.7137681159420289, "grad_norm": 3.7946994304656982, "learning_rate": 1.9992068909296607e-06, "loss": 0.8509, "step": 9259 }, { "epoch": 0.713845205057046, "grad_norm": 4.217837810516357, "learning_rate": 1.998208375237905e-06, "loss": 0.9054, "step": 9260 }, { "epoch": 0.7139222941720629, "grad_norm": 4.02250862121582, "learning_rate": 1.9972100466923083e-06, "loss": 0.8969, "step": 9261 }, { "epoch": 0.7139993832870799, "grad_norm": 3.8427062034606934, "learning_rate": 1.996211905355114e-06, "loss": 0.8616, "step": 9262 }, { "epoch": 0.7140764724020968, "grad_norm": 3.860753059387207, "learning_rate": 1.9952139512885497e-06, "loss": 1.0435, "step": 9263 }, { "epoch": 0.7141535615171137, "grad_norm": 3.5810599327087402, "learning_rate": 1.9942161845548334e-06, "loss": 0.8754, "step": 9264 }, { "epoch": 0.7142306506321308, "grad_norm": 3.545610189437866, "learning_rate": 1.993218605216171e-06, "loss": 0.8673, "step": 9265 }, { "epoch": 0.7143077397471477, "grad_norm": 3.7623043060302734, "learning_rate": 1.9922212133347575e-06, "loss": 1.0008, "step": 9266 }, { "epoch": 0.7143848288621647, "grad_norm": 3.5907156467437744, "learning_rate": 1.991224008972772e-06, "loss": 0.9327, "step": 9267 }, { "epoch": 0.7144619179771816, "grad_norm": 3.638535261154175, "learning_rate": 1.9902269921923867e-06, "loss": 0.924, "step": 9268 }, { "epoch": 0.7145390070921985, "grad_norm": 3.881423234939575, "learning_rate": 1.9892301630557604e-06, "loss": 0.9654, "step": 9269 }, { "epoch": 0.7146160962072156, "grad_norm": 3.544396162033081, "learning_rate": 1.9882335216250402e-06, "loss": 0.8691, "step": 9270 }, { "epoch": 0.7146931853222325, "grad_norm": 3.863416910171509, "learning_rate": 1.987237067962363e-06, "loss": 0.8315, "step": 9271 }, { "epoch": 0.7147702744372495, "grad_norm": 3.7197678089141846, "learning_rate": 1.9862408021298503e-06, "loss": 0.9305, "step": 9272 }, { "epoch": 0.7148473635522664, "grad_norm": 3.4291746616363525, "learning_rate": 1.9852447241896122e-06, "loss": 0.8485, "step": 9273 }, { "epoch": 0.7149244526672833, "grad_norm": 3.7952208518981934, "learning_rate": 1.984248834203754e-06, "loss": 0.8727, "step": 9274 }, { "epoch": 0.7150015417823004, "grad_norm": 3.9852194786071777, "learning_rate": 1.9832531322343617e-06, "loss": 0.9672, "step": 9275 }, { "epoch": 0.7150786308973173, "grad_norm": 3.7083189487457275, "learning_rate": 1.982257618343515e-06, "loss": 0.8642, "step": 9276 }, { "epoch": 0.7151557200123343, "grad_norm": 3.6518466472625732, "learning_rate": 1.981262292593274e-06, "loss": 0.8827, "step": 9277 }, { "epoch": 0.7152328091273512, "grad_norm": 3.622223138809204, "learning_rate": 1.9802671550456948e-06, "loss": 0.8975, "step": 9278 }, { "epoch": 0.7153098982423681, "grad_norm": 3.709808349609375, "learning_rate": 1.979272205762819e-06, "loss": 0.9319, "step": 9279 }, { "epoch": 0.7153869873573852, "grad_norm": 3.7233026027679443, "learning_rate": 1.978277444806676e-06, "loss": 0.855, "step": 9280 }, { "epoch": 0.7154640764724021, "grad_norm": 3.8098905086517334, "learning_rate": 1.9772828722392866e-06, "loss": 0.895, "step": 9281 }, { "epoch": 0.7155411655874191, "grad_norm": 3.943906545639038, "learning_rate": 1.9762884881226535e-06, "loss": 0.8825, "step": 9282 }, { "epoch": 0.715618254702436, "grad_norm": 3.8374009132385254, "learning_rate": 1.9752942925187725e-06, "loss": 0.7061, "step": 9283 }, { "epoch": 0.7156953438174529, "grad_norm": 3.700810194015503, "learning_rate": 1.974300285489627e-06, "loss": 0.9142, "step": 9284 }, { "epoch": 0.71577243293247, "grad_norm": 3.885406494140625, "learning_rate": 1.9733064670971886e-06, "loss": 0.9404, "step": 9285 }, { "epoch": 0.7158495220474869, "grad_norm": 3.8215103149414062, "learning_rate": 1.972312837403416e-06, "loss": 0.9591, "step": 9286 }, { "epoch": 0.7159266111625039, "grad_norm": 3.734654426574707, "learning_rate": 1.971319396470259e-06, "loss": 0.9052, "step": 9287 }, { "epoch": 0.7160037002775208, "grad_norm": 3.6638259887695312, "learning_rate": 1.97032614435965e-06, "loss": 1.0389, "step": 9288 }, { "epoch": 0.7160807893925377, "grad_norm": 3.528590440750122, "learning_rate": 1.969333081133515e-06, "loss": 0.8955, "step": 9289 }, { "epoch": 0.7161578785075547, "grad_norm": 3.670738458633423, "learning_rate": 1.9683402068537654e-06, "loss": 1.0466, "step": 9290 }, { "epoch": 0.7162349676225717, "grad_norm": 3.703134775161743, "learning_rate": 1.9673475215823035e-06, "loss": 0.9594, "step": 9291 }, { "epoch": 0.7163120567375887, "grad_norm": 4.054429054260254, "learning_rate": 1.966355025381018e-06, "loss": 0.9606, "step": 9292 }, { "epoch": 0.7163891458526056, "grad_norm": 3.620304584503174, "learning_rate": 1.965362718311784e-06, "loss": 0.8857, "step": 9293 }, { "epoch": 0.7164662349676225, "grad_norm": 3.4398069381713867, "learning_rate": 1.9643706004364675e-06, "loss": 0.7762, "step": 9294 }, { "epoch": 0.7165433240826395, "grad_norm": 3.881047487258911, "learning_rate": 1.9633786718169217e-06, "loss": 0.9335, "step": 9295 }, { "epoch": 0.7166204131976565, "grad_norm": 3.718986749649048, "learning_rate": 1.9623869325149893e-06, "loss": 0.8663, "step": 9296 }, { "epoch": 0.7166975023126735, "grad_norm": 3.492274522781372, "learning_rate": 1.9613953825925008e-06, "loss": 0.9198, "step": 9297 }, { "epoch": 0.7167745914276904, "grad_norm": 3.6691811084747314, "learning_rate": 1.960404022111271e-06, "loss": 0.8376, "step": 9298 }, { "epoch": 0.7168516805427073, "grad_norm": 3.32060170173645, "learning_rate": 1.9594128511331082e-06, "loss": 0.7495, "step": 9299 }, { "epoch": 0.7169287696577243, "grad_norm": 3.6289753913879395, "learning_rate": 1.958421869719807e-06, "loss": 0.9179, "step": 9300 }, { "epoch": 0.7170058587727413, "grad_norm": 3.9919261932373047, "learning_rate": 1.957431077933149e-06, "loss": 0.8687, "step": 9301 }, { "epoch": 0.7170829478877583, "grad_norm": 4.032829761505127, "learning_rate": 1.9564404758349055e-06, "loss": 0.9729, "step": 9302 }, { "epoch": 0.7171600370027752, "grad_norm": 3.717048406600952, "learning_rate": 1.955450063486837e-06, "loss": 0.9274, "step": 9303 }, { "epoch": 0.7172371261177922, "grad_norm": 3.426722288131714, "learning_rate": 1.954459840950687e-06, "loss": 0.8553, "step": 9304 }, { "epoch": 0.7173142152328091, "grad_norm": 3.5215723514556885, "learning_rate": 1.9534698082881926e-06, "loss": 0.9, "step": 9305 }, { "epoch": 0.717391304347826, "grad_norm": 3.6140124797821045, "learning_rate": 1.9524799655610776e-06, "loss": 0.8674, "step": 9306 }, { "epoch": 0.7174683934628431, "grad_norm": 3.1654090881347656, "learning_rate": 1.951490312831053e-06, "loss": 0.7923, "step": 9307 }, { "epoch": 0.71754548257786, "grad_norm": 3.4995226860046387, "learning_rate": 1.9505008501598204e-06, "loss": 0.9468, "step": 9308 }, { "epoch": 0.717622571692877, "grad_norm": 3.9290666580200195, "learning_rate": 1.949511577609065e-06, "loss": 0.8838, "step": 9309 }, { "epoch": 0.7176996608078939, "grad_norm": 4.191568851470947, "learning_rate": 1.948522495240463e-06, "loss": 0.8792, "step": 9310 }, { "epoch": 0.7177767499229109, "grad_norm": 4.151352882385254, "learning_rate": 1.94753360311568e-06, "loss": 0.9023, "step": 9311 }, { "epoch": 0.7178538390379279, "grad_norm": 3.6860830783843994, "learning_rate": 1.946544901296367e-06, "loss": 0.9006, "step": 9312 }, { "epoch": 0.7179309281529448, "grad_norm": 3.961071491241455, "learning_rate": 1.945556389844166e-06, "loss": 0.8171, "step": 9313 }, { "epoch": 0.7180080172679618, "grad_norm": 3.5512242317199707, "learning_rate": 1.9445680688207065e-06, "loss": 1.0231, "step": 9314 }, { "epoch": 0.7180851063829787, "grad_norm": 4.143185615539551, "learning_rate": 1.943579938287601e-06, "loss": 0.9645, "step": 9315 }, { "epoch": 0.7181621954979956, "grad_norm": 3.999950885772705, "learning_rate": 1.942591998306457e-06, "loss": 0.9124, "step": 9316 }, { "epoch": 0.7182392846130127, "grad_norm": 3.6654207706451416, "learning_rate": 1.941604248938867e-06, "loss": 0.8595, "step": 9317 }, { "epoch": 0.7183163737280296, "grad_norm": 4.063483238220215, "learning_rate": 1.9406166902464128e-06, "loss": 0.9909, "step": 9318 }, { "epoch": 0.7183934628430466, "grad_norm": 3.7724883556365967, "learning_rate": 1.9396293222906626e-06, "loss": 0.9458, "step": 9319 }, { "epoch": 0.7184705519580635, "grad_norm": 3.940067768096924, "learning_rate": 1.9386421451331737e-06, "loss": 0.9825, "step": 9320 }, { "epoch": 0.7185476410730804, "grad_norm": 3.5647549629211426, "learning_rate": 1.9376551588354924e-06, "loss": 0.9541, "step": 9321 }, { "epoch": 0.7186247301880975, "grad_norm": 3.541923761367798, "learning_rate": 1.936668363459152e-06, "loss": 0.9573, "step": 9322 }, { "epoch": 0.7187018193031144, "grad_norm": 3.8034353256225586, "learning_rate": 1.9356817590656734e-06, "loss": 0.7969, "step": 9323 }, { "epoch": 0.7187789084181314, "grad_norm": 3.5387001037597656, "learning_rate": 1.934695345716568e-06, "loss": 0.8029, "step": 9324 }, { "epoch": 0.7188559975331483, "grad_norm": 3.763958215713501, "learning_rate": 1.933709123473331e-06, "loss": 0.8748, "step": 9325 }, { "epoch": 0.7189330866481652, "grad_norm": 3.617594003677368, "learning_rate": 1.9327230923974487e-06, "loss": 0.8477, "step": 9326 }, { "epoch": 0.7190101757631823, "grad_norm": 3.5570931434631348, "learning_rate": 1.931737252550396e-06, "loss": 0.8856, "step": 9327 }, { "epoch": 0.7190872648781992, "grad_norm": 3.4663596153259277, "learning_rate": 1.9307516039936354e-06, "loss": 0.874, "step": 9328 }, { "epoch": 0.7191643539932162, "grad_norm": 3.960660696029663, "learning_rate": 1.9297661467886177e-06, "loss": 0.9692, "step": 9329 }, { "epoch": 0.7192414431082331, "grad_norm": 3.4713823795318604, "learning_rate": 1.928780880996777e-06, "loss": 0.8727, "step": 9330 }, { "epoch": 0.71931853222325, "grad_norm": 3.68634295463562, "learning_rate": 1.9277958066795426e-06, "loss": 0.8905, "step": 9331 }, { "epoch": 0.7193956213382671, "grad_norm": 3.8657889366149902, "learning_rate": 1.9268109238983287e-06, "loss": 0.9138, "step": 9332 }, { "epoch": 0.719472710453284, "grad_norm": 3.750303268432617, "learning_rate": 1.925826232714537e-06, "loss": 0.9105, "step": 9333 }, { "epoch": 0.719549799568301, "grad_norm": 3.9123423099517822, "learning_rate": 1.924841733189558e-06, "loss": 0.8826, "step": 9334 }, { "epoch": 0.7196268886833179, "grad_norm": 3.7364614009857178, "learning_rate": 1.923857425384772e-06, "loss": 0.9367, "step": 9335 }, { "epoch": 0.7197039777983348, "grad_norm": 4.046642303466797, "learning_rate": 1.922873309361542e-06, "loss": 1.0428, "step": 9336 }, { "epoch": 0.7197810669133519, "grad_norm": 3.7967915534973145, "learning_rate": 1.921889385181225e-06, "loss": 0.9271, "step": 9337 }, { "epoch": 0.7198581560283688, "grad_norm": 4.126679420471191, "learning_rate": 1.9209056529051617e-06, "loss": 0.9439, "step": 9338 }, { "epoch": 0.7199352451433858, "grad_norm": 3.514425039291382, "learning_rate": 1.9199221125946847e-06, "loss": 0.8919, "step": 9339 }, { "epoch": 0.7200123342584027, "grad_norm": 3.4573843479156494, "learning_rate": 1.9189387643111135e-06, "loss": 0.8665, "step": 9340 }, { "epoch": 0.7200894233734196, "grad_norm": 4.3528242111206055, "learning_rate": 1.9179556081157513e-06, "loss": 0.9908, "step": 9341 }, { "epoch": 0.7201665124884367, "grad_norm": 3.473971366882324, "learning_rate": 1.9169726440698945e-06, "loss": 0.8575, "step": 9342 }, { "epoch": 0.7202436016034536, "grad_norm": 3.958357334136963, "learning_rate": 1.9159898722348264e-06, "loss": 0.9238, "step": 9343 }, { "epoch": 0.7203206907184706, "grad_norm": 3.5286219120025635, "learning_rate": 1.9150072926718166e-06, "loss": 0.9294, "step": 9344 }, { "epoch": 0.7203977798334875, "grad_norm": 3.8079938888549805, "learning_rate": 1.914024905442127e-06, "loss": 0.9186, "step": 9345 }, { "epoch": 0.7204748689485044, "grad_norm": 3.7631888389587402, "learning_rate": 1.9130427106069993e-06, "loss": 0.9976, "step": 9346 }, { "epoch": 0.7205519580635215, "grad_norm": 3.6607329845428467, "learning_rate": 1.912060708227671e-06, "loss": 0.9171, "step": 9347 }, { "epoch": 0.7206290471785384, "grad_norm": 3.550433874130249, "learning_rate": 1.911078898365365e-06, "loss": 0.8487, "step": 9348 }, { "epoch": 0.7207061362935554, "grad_norm": 3.631561040878296, "learning_rate": 1.9100972810812918e-06, "loss": 0.9102, "step": 9349 }, { "epoch": 0.7207832254085723, "grad_norm": 4.037540435791016, "learning_rate": 1.90911585643665e-06, "loss": 0.9158, "step": 9350 }, { "epoch": 0.7208603145235892, "grad_norm": 4.127115249633789, "learning_rate": 1.908134624492628e-06, "loss": 0.9296, "step": 9351 }, { "epoch": 0.7209374036386063, "grad_norm": 4.0273613929748535, "learning_rate": 1.9071535853103978e-06, "loss": 0.9744, "step": 9352 }, { "epoch": 0.7210144927536232, "grad_norm": 4.10391092300415, "learning_rate": 1.9061727389511226e-06, "loss": 0.8842, "step": 9353 }, { "epoch": 0.7210915818686402, "grad_norm": 3.6492302417755127, "learning_rate": 1.9051920854759543e-06, "loss": 0.9429, "step": 9354 }, { "epoch": 0.7211686709836571, "grad_norm": 3.974447011947632, "learning_rate": 1.9042116249460307e-06, "loss": 0.9477, "step": 9355 }, { "epoch": 0.721245760098674, "grad_norm": 3.8912482261657715, "learning_rate": 1.903231357422481e-06, "loss": 0.8826, "step": 9356 }, { "epoch": 0.721322849213691, "grad_norm": 3.689361810684204, "learning_rate": 1.9022512829664153e-06, "loss": 0.9352, "step": 9357 }, { "epoch": 0.721399938328708, "grad_norm": 3.682281732559204, "learning_rate": 1.9012714016389388e-06, "loss": 0.8895, "step": 9358 }, { "epoch": 0.721477027443725, "grad_norm": 3.9238123893737793, "learning_rate": 1.9002917135011413e-06, "loss": 0.9899, "step": 9359 }, { "epoch": 0.7215541165587419, "grad_norm": 3.783236503601074, "learning_rate": 1.899312218614102e-06, "loss": 0.9622, "step": 9360 }, { "epoch": 0.7216312056737588, "grad_norm": 3.7680461406707764, "learning_rate": 1.898332917038887e-06, "loss": 0.9219, "step": 9361 }, { "epoch": 0.7217082947887759, "grad_norm": 3.3629398345947266, "learning_rate": 1.8973538088365507e-06, "loss": 0.8667, "step": 9362 }, { "epoch": 0.7217853839037928, "grad_norm": 3.6810696125030518, "learning_rate": 1.8963748940681349e-06, "loss": 1.0191, "step": 9363 }, { "epoch": 0.7218624730188098, "grad_norm": 4.012876987457275, "learning_rate": 1.8953961727946706e-06, "loss": 0.8726, "step": 9364 }, { "epoch": 0.7219395621338267, "grad_norm": 3.692685842514038, "learning_rate": 1.8944176450771761e-06, "loss": 0.8478, "step": 9365 }, { "epoch": 0.7220166512488436, "grad_norm": 3.4944231510162354, "learning_rate": 1.893439310976659e-06, "loss": 0.8727, "step": 9366 }, { "epoch": 0.7220937403638606, "grad_norm": 3.8247053623199463, "learning_rate": 1.8924611705541095e-06, "loss": 0.9374, "step": 9367 }, { "epoch": 0.7221708294788776, "grad_norm": 4.079497814178467, "learning_rate": 1.8914832238705117e-06, "loss": 0.9631, "step": 9368 }, { "epoch": 0.7222479185938946, "grad_norm": 4.07694149017334, "learning_rate": 1.8905054709868354e-06, "loss": 1.0642, "step": 9369 }, { "epoch": 0.7223250077089115, "grad_norm": 3.5892646312713623, "learning_rate": 1.8895279119640387e-06, "loss": 0.9717, "step": 9370 }, { "epoch": 0.7224020968239284, "grad_norm": 3.906541347503662, "learning_rate": 1.8885505468630673e-06, "loss": 0.9333, "step": 9371 }, { "epoch": 0.7224791859389454, "grad_norm": 4.1830830574035645, "learning_rate": 1.887573375744856e-06, "loss": 0.993, "step": 9372 }, { "epoch": 0.7225562750539624, "grad_norm": 3.726923942565918, "learning_rate": 1.8865963986703234e-06, "loss": 0.9537, "step": 9373 }, { "epoch": 0.7226333641689794, "grad_norm": 3.74369215965271, "learning_rate": 1.885619615700381e-06, "loss": 0.9731, "step": 9374 }, { "epoch": 0.7227104532839963, "grad_norm": 4.148539066314697, "learning_rate": 1.8846430268959253e-06, "loss": 1.0761, "step": 9375 }, { "epoch": 0.7227875423990132, "grad_norm": 3.4325528144836426, "learning_rate": 1.883666632317842e-06, "loss": 0.846, "step": 9376 }, { "epoch": 0.7228646315140302, "grad_norm": 3.434077024459839, "learning_rate": 1.8826904320270068e-06, "loss": 0.7253, "step": 9377 }, { "epoch": 0.7229417206290472, "grad_norm": 3.7879035472869873, "learning_rate": 1.8817144260842757e-06, "loss": 0.8692, "step": 9378 }, { "epoch": 0.7230188097440642, "grad_norm": 3.6962223052978516, "learning_rate": 1.8807386145505002e-06, "loss": 0.8621, "step": 9379 }, { "epoch": 0.7230958988590811, "grad_norm": 3.504754066467285, "learning_rate": 1.8797629974865172e-06, "loss": 0.8153, "step": 9380 }, { "epoch": 0.723172987974098, "grad_norm": 3.4584434032440186, "learning_rate": 1.8787875749531509e-06, "loss": 0.9573, "step": 9381 }, { "epoch": 0.723250077089115, "grad_norm": 3.89050555229187, "learning_rate": 1.8778123470112141e-06, "loss": 0.8954, "step": 9382 }, { "epoch": 0.723327166204132, "grad_norm": 3.5211403369903564, "learning_rate": 1.8768373137215096e-06, "loss": 0.8079, "step": 9383 }, { "epoch": 0.723404255319149, "grad_norm": 3.81539249420166, "learning_rate": 1.8758624751448213e-06, "loss": 0.9524, "step": 9384 }, { "epoch": 0.7234813444341659, "grad_norm": 3.4943387508392334, "learning_rate": 1.8748878313419271e-06, "loss": 0.8294, "step": 9385 }, { "epoch": 0.7235584335491828, "grad_norm": 3.9146976470947266, "learning_rate": 1.873913382373591e-06, "loss": 0.9008, "step": 9386 }, { "epoch": 0.7236355226641998, "grad_norm": 3.7792716026306152, "learning_rate": 1.872939128300566e-06, "loss": 1.0432, "step": 9387 }, { "epoch": 0.7237126117792168, "grad_norm": 4.022826671600342, "learning_rate": 1.8719650691835917e-06, "loss": 0.9682, "step": 9388 }, { "epoch": 0.7237897008942338, "grad_norm": 3.6135926246643066, "learning_rate": 1.8709912050833933e-06, "loss": 0.8809, "step": 9389 }, { "epoch": 0.7238667900092507, "grad_norm": 3.3770127296447754, "learning_rate": 1.8700175360606882e-06, "loss": 0.8576, "step": 9390 }, { "epoch": 0.7239438791242676, "grad_norm": 3.8401501178741455, "learning_rate": 1.869044062176179e-06, "loss": 0.975, "step": 9391 }, { "epoch": 0.7240209682392846, "grad_norm": 3.574070453643799, "learning_rate": 1.8680707834905565e-06, "loss": 0.8732, "step": 9392 }, { "epoch": 0.7240980573543015, "grad_norm": 3.733696699142456, "learning_rate": 1.8670977000645018e-06, "loss": 1.0079, "step": 9393 }, { "epoch": 0.7241751464693186, "grad_norm": 3.893634557723999, "learning_rate": 1.8661248119586784e-06, "loss": 0.9188, "step": 9394 }, { "epoch": 0.7242522355843355, "grad_norm": 3.5450663566589355, "learning_rate": 1.865152119233742e-06, "loss": 0.9227, "step": 9395 }, { "epoch": 0.7243293246993524, "grad_norm": 4.02310848236084, "learning_rate": 1.864179621950335e-06, "loss": 0.8968, "step": 9396 }, { "epoch": 0.7244064138143694, "grad_norm": 3.913698434829712, "learning_rate": 1.8632073201690882e-06, "loss": 0.8523, "step": 9397 }, { "epoch": 0.7244835029293863, "grad_norm": 3.8382716178894043, "learning_rate": 1.8622352139506184e-06, "loss": 0.8597, "step": 9398 }, { "epoch": 0.7245605920444034, "grad_norm": 4.016849040985107, "learning_rate": 1.8612633033555345e-06, "loss": 0.7716, "step": 9399 }, { "epoch": 0.7246376811594203, "grad_norm": 3.819669485092163, "learning_rate": 1.8602915884444257e-06, "loss": 0.9032, "step": 9400 }, { "epoch": 0.7247147702744372, "grad_norm": 4.067957878112793, "learning_rate": 1.859320069277875e-06, "loss": 0.9601, "step": 9401 }, { "epoch": 0.7247918593894542, "grad_norm": 3.471204996109009, "learning_rate": 1.8583487459164528e-06, "loss": 0.9013, "step": 9402 }, { "epoch": 0.7248689485044711, "grad_norm": 3.366598606109619, "learning_rate": 1.8573776184207148e-06, "loss": 0.9391, "step": 9403 }, { "epoch": 0.7249460376194882, "grad_norm": 3.657421827316284, "learning_rate": 1.8564066868512082e-06, "loss": 0.8154, "step": 9404 }, { "epoch": 0.7250231267345051, "grad_norm": 3.919306755065918, "learning_rate": 1.8554359512684617e-06, "loss": 0.9312, "step": 9405 }, { "epoch": 0.725100215849522, "grad_norm": 3.4179515838623047, "learning_rate": 1.8544654117329958e-06, "loss": 0.8741, "step": 9406 }, { "epoch": 0.725177304964539, "grad_norm": 3.9625890254974365, "learning_rate": 1.8534950683053215e-06, "loss": 0.8815, "step": 9407 }, { "epoch": 0.7252543940795559, "grad_norm": 3.6159121990203857, "learning_rate": 1.8525249210459345e-06, "loss": 0.8737, "step": 9408 }, { "epoch": 0.725331483194573, "grad_norm": 3.8844573497772217, "learning_rate": 1.8515549700153185e-06, "loss": 0.8763, "step": 9409 }, { "epoch": 0.7254085723095899, "grad_norm": 3.9960150718688965, "learning_rate": 1.8505852152739423e-06, "loss": 1.0845, "step": 9410 }, { "epoch": 0.7254856614246068, "grad_norm": 3.7577638626098633, "learning_rate": 1.849615656882267e-06, "loss": 1.0138, "step": 9411 }, { "epoch": 0.7255627505396238, "grad_norm": 3.612480401992798, "learning_rate": 1.8486462949007388e-06, "loss": 0.9291, "step": 9412 }, { "epoch": 0.7256398396546407, "grad_norm": 3.4609127044677734, "learning_rate": 1.8476771293897932e-06, "loss": 0.9554, "step": 9413 }, { "epoch": 0.7257169287696578, "grad_norm": 3.4932973384857178, "learning_rate": 1.846708160409854e-06, "loss": 0.96, "step": 9414 }, { "epoch": 0.7257940178846747, "grad_norm": 3.711120367050171, "learning_rate": 1.8457393880213282e-06, "loss": 0.9477, "step": 9415 }, { "epoch": 0.7258711069996916, "grad_norm": 3.978654384613037, "learning_rate": 1.8447708122846148e-06, "loss": 0.9769, "step": 9416 }, { "epoch": 0.7259481961147086, "grad_norm": 3.9434118270874023, "learning_rate": 1.8438024332601002e-06, "loss": 0.9147, "step": 9417 }, { "epoch": 0.7260252852297255, "grad_norm": 3.865705966949463, "learning_rate": 1.8428342510081571e-06, "loss": 0.9856, "step": 9418 }, { "epoch": 0.7261023743447426, "grad_norm": 3.8399107456207275, "learning_rate": 1.8418662655891472e-06, "loss": 0.9694, "step": 9419 }, { "epoch": 0.7261794634597595, "grad_norm": 3.621166467666626, "learning_rate": 1.8408984770634209e-06, "loss": 0.9042, "step": 9420 }, { "epoch": 0.7262565525747764, "grad_norm": 3.5802974700927734, "learning_rate": 1.8399308854913118e-06, "loss": 0.8997, "step": 9421 }, { "epoch": 0.7263336416897934, "grad_norm": 3.3973002433776855, "learning_rate": 1.8389634909331449e-06, "loss": 0.8711, "step": 9422 }, { "epoch": 0.7264107308048103, "grad_norm": 3.4591875076293945, "learning_rate": 1.8379962934492335e-06, "loss": 0.8588, "step": 9423 }, { "epoch": 0.7264878199198274, "grad_norm": 3.682715654373169, "learning_rate": 1.8370292930998768e-06, "loss": 0.859, "step": 9424 }, { "epoch": 0.7265649090348443, "grad_norm": 3.5783348083496094, "learning_rate": 1.8360624899453638e-06, "loss": 0.7877, "step": 9425 }, { "epoch": 0.7266419981498612, "grad_norm": 3.5001380443573, "learning_rate": 1.8350958840459665e-06, "loss": 0.8595, "step": 9426 }, { "epoch": 0.7267190872648782, "grad_norm": 3.755657434463501, "learning_rate": 1.8341294754619487e-06, "loss": 0.9371, "step": 9427 }, { "epoch": 0.7267961763798951, "grad_norm": 4.00222635269165, "learning_rate": 1.8331632642535623e-06, "loss": 0.9562, "step": 9428 }, { "epoch": 0.7268732654949122, "grad_norm": 3.519622802734375, "learning_rate": 1.8321972504810448e-06, "loss": 0.9217, "step": 9429 }, { "epoch": 0.7269503546099291, "grad_norm": 3.830078363418579, "learning_rate": 1.8312314342046222e-06, "loss": 0.9358, "step": 9430 }, { "epoch": 0.727027443724946, "grad_norm": 3.737502336502075, "learning_rate": 1.8302658154845099e-06, "loss": 0.9166, "step": 9431 }, { "epoch": 0.727104532839963, "grad_norm": 3.694373369216919, "learning_rate": 1.8293003943809062e-06, "loss": 0.8676, "step": 9432 }, { "epoch": 0.7271816219549799, "grad_norm": 3.707840919494629, "learning_rate": 1.828335170954001e-06, "loss": 0.8953, "step": 9433 }, { "epoch": 0.727258711069997, "grad_norm": 3.8381030559539795, "learning_rate": 1.8273701452639713e-06, "loss": 0.9618, "step": 9434 }, { "epoch": 0.7273358001850139, "grad_norm": 3.7728703022003174, "learning_rate": 1.8264053173709817e-06, "loss": 0.8564, "step": 9435 }, { "epoch": 0.7274128893000308, "grad_norm": 3.9917378425598145, "learning_rate": 1.825440687335186e-06, "loss": 1.0076, "step": 9436 }, { "epoch": 0.7274899784150478, "grad_norm": 3.8551135063171387, "learning_rate": 1.82447625521672e-06, "loss": 0.9929, "step": 9437 }, { "epoch": 0.7275670675300647, "grad_norm": 3.5121772289276123, "learning_rate": 1.8235120210757134e-06, "loss": 0.8955, "step": 9438 }, { "epoch": 0.7276441566450818, "grad_norm": 4.1116509437561035, "learning_rate": 1.8225479849722804e-06, "loss": 0.9207, "step": 9439 }, { "epoch": 0.7277212457600987, "grad_norm": 3.6008291244506836, "learning_rate": 1.8215841469665247e-06, "loss": 0.9125, "step": 9440 }, { "epoch": 0.7277983348751156, "grad_norm": 3.791553020477295, "learning_rate": 1.8206205071185373e-06, "loss": 0.8539, "step": 9441 }, { "epoch": 0.7278754239901326, "grad_norm": 3.53488826751709, "learning_rate": 1.8196570654883932e-06, "loss": 0.8545, "step": 9442 }, { "epoch": 0.7279525131051495, "grad_norm": 3.721651554107666, "learning_rate": 1.8186938221361594e-06, "loss": 1.0581, "step": 9443 }, { "epoch": 0.7280296022201665, "grad_norm": 3.7198684215545654, "learning_rate": 1.8177307771218894e-06, "loss": 0.8781, "step": 9444 }, { "epoch": 0.7281066913351835, "grad_norm": 3.6171069145202637, "learning_rate": 1.8167679305056247e-06, "loss": 0.9443, "step": 9445 }, { "epoch": 0.7281837804502004, "grad_norm": 3.718351125717163, "learning_rate": 1.8158052823473927e-06, "loss": 0.9222, "step": 9446 }, { "epoch": 0.7282608695652174, "grad_norm": 3.887049913406372, "learning_rate": 1.8148428327072114e-06, "loss": 0.9062, "step": 9447 }, { "epoch": 0.7283379586802343, "grad_norm": 3.688441514968872, "learning_rate": 1.8138805816450815e-06, "loss": 0.9262, "step": 9448 }, { "epoch": 0.7284150477952513, "grad_norm": 3.61232852935791, "learning_rate": 1.812918529220996e-06, "loss": 0.8991, "step": 9449 }, { "epoch": 0.7284921369102683, "grad_norm": 3.5513792037963867, "learning_rate": 1.8119566754949324e-06, "loss": 0.8965, "step": 9450 }, { "epoch": 0.7285692260252852, "grad_norm": 3.7241334915161133, "learning_rate": 1.8109950205268624e-06, "loss": 0.8641, "step": 9451 }, { "epoch": 0.7286463151403022, "grad_norm": 3.3493247032165527, "learning_rate": 1.8100335643767347e-06, "loss": 0.9665, "step": 9452 }, { "epoch": 0.7287234042553191, "grad_norm": 3.7719924449920654, "learning_rate": 1.809072307104493e-06, "loss": 0.8762, "step": 9453 }, { "epoch": 0.7288004933703361, "grad_norm": 3.6483349800109863, "learning_rate": 1.8081112487700665e-06, "loss": 0.8928, "step": 9454 }, { "epoch": 0.7288775824853531, "grad_norm": 3.8290278911590576, "learning_rate": 1.8071503894333725e-06, "loss": 0.9746, "step": 9455 }, { "epoch": 0.72895467160037, "grad_norm": 3.715212345123291, "learning_rate": 1.8061897291543157e-06, "loss": 0.8306, "step": 9456 }, { "epoch": 0.729031760715387, "grad_norm": 3.781547784805298, "learning_rate": 1.8052292679927896e-06, "loss": 0.8701, "step": 9457 }, { "epoch": 0.7291088498304039, "grad_norm": 3.933978796005249, "learning_rate": 1.804269006008671e-06, "loss": 0.9048, "step": 9458 }, { "epoch": 0.7291859389454209, "grad_norm": 3.5607521533966064, "learning_rate": 1.803308943261829e-06, "loss": 0.861, "step": 9459 }, { "epoch": 0.7292630280604379, "grad_norm": 3.4771389961242676, "learning_rate": 1.802349079812118e-06, "loss": 0.9354, "step": 9460 }, { "epoch": 0.7293401171754548, "grad_norm": 3.6316308975219727, "learning_rate": 1.8013894157193807e-06, "loss": 1.0234, "step": 9461 }, { "epoch": 0.7294172062904718, "grad_norm": 3.486682176589966, "learning_rate": 1.8004299510434493e-06, "loss": 0.8481, "step": 9462 }, { "epoch": 0.7294942954054887, "grad_norm": 3.479135036468506, "learning_rate": 1.7994706858441375e-06, "loss": 0.9336, "step": 9463 }, { "epoch": 0.7295713845205057, "grad_norm": 3.551004409790039, "learning_rate": 1.7985116201812524e-06, "loss": 0.937, "step": 9464 }, { "epoch": 0.7296484736355227, "grad_norm": 3.698634386062622, "learning_rate": 1.7975527541145865e-06, "loss": 0.9793, "step": 9465 }, { "epoch": 0.7297255627505396, "grad_norm": 3.525210380554199, "learning_rate": 1.7965940877039211e-06, "loss": 1.0082, "step": 9466 }, { "epoch": 0.7298026518655566, "grad_norm": 3.7876412868499756, "learning_rate": 1.7956356210090236e-06, "loss": 0.8984, "step": 9467 }, { "epoch": 0.7298797409805735, "grad_norm": 4.059507369995117, "learning_rate": 1.7946773540896506e-06, "loss": 0.9558, "step": 9468 }, { "epoch": 0.7299568300955905, "grad_norm": 3.954590320587158, "learning_rate": 1.793719287005542e-06, "loss": 1.0237, "step": 9469 }, { "epoch": 0.7300339192106075, "grad_norm": 4.025545597076416, "learning_rate": 1.7927614198164306e-06, "loss": 1.0038, "step": 9470 }, { "epoch": 0.7301110083256244, "grad_norm": 3.4767184257507324, "learning_rate": 1.7918037525820336e-06, "loss": 0.832, "step": 9471 }, { "epoch": 0.7301880974406414, "grad_norm": 3.732330322265625, "learning_rate": 1.7908462853620568e-06, "loss": 0.9538, "step": 9472 }, { "epoch": 0.7302651865556583, "grad_norm": 3.6224257946014404, "learning_rate": 1.7898890182161954e-06, "loss": 0.9717, "step": 9473 }, { "epoch": 0.7303422756706753, "grad_norm": 3.483696460723877, "learning_rate": 1.7889319512041264e-06, "loss": 0.8152, "step": 9474 }, { "epoch": 0.7304193647856922, "grad_norm": 3.9409186840057373, "learning_rate": 1.7879750843855197e-06, "loss": 0.9242, "step": 9475 }, { "epoch": 0.7304964539007093, "grad_norm": 3.357489585876465, "learning_rate": 1.7870184178200312e-06, "loss": 0.9181, "step": 9476 }, { "epoch": 0.7305735430157262, "grad_norm": 3.646585702896118, "learning_rate": 1.7860619515673034e-06, "loss": 0.9368, "step": 9477 }, { "epoch": 0.7306506321307431, "grad_norm": 3.7453439235687256, "learning_rate": 1.7851056856869681e-06, "loss": 0.9319, "step": 9478 }, { "epoch": 0.7307277212457601, "grad_norm": 3.725774049758911, "learning_rate": 1.7841496202386437e-06, "loss": 0.8849, "step": 9479 }, { "epoch": 0.730804810360777, "grad_norm": 3.687467098236084, "learning_rate": 1.7831937552819345e-06, "loss": 0.9166, "step": 9480 }, { "epoch": 0.7308818994757941, "grad_norm": 3.666307210922241, "learning_rate": 1.7822380908764336e-06, "loss": 0.768, "step": 9481 }, { "epoch": 0.730958988590811, "grad_norm": 3.638786554336548, "learning_rate": 1.781282627081723e-06, "loss": 0.8445, "step": 9482 }, { "epoch": 0.7310360777058279, "grad_norm": 3.7186498641967773, "learning_rate": 1.7803273639573704e-06, "loss": 0.9877, "step": 9483 }, { "epoch": 0.7311131668208449, "grad_norm": 4.096242904663086, "learning_rate": 1.7793723015629333e-06, "loss": 0.9338, "step": 9484 }, { "epoch": 0.7311902559358618, "grad_norm": 3.3239645957946777, "learning_rate": 1.7784174399579513e-06, "loss": 0.7036, "step": 9485 }, { "epoch": 0.7312673450508789, "grad_norm": 3.5666675567626953, "learning_rate": 1.7774627792019567e-06, "loss": 0.8775, "step": 9486 }, { "epoch": 0.7313444341658958, "grad_norm": 3.7199814319610596, "learning_rate": 1.7765083193544679e-06, "loss": 0.974, "step": 9487 }, { "epoch": 0.7314215232809127, "grad_norm": 3.733372688293457, "learning_rate": 1.775554060474991e-06, "loss": 0.9032, "step": 9488 }, { "epoch": 0.7314986123959297, "grad_norm": 4.118587493896484, "learning_rate": 1.7746000026230198e-06, "loss": 0.8837, "step": 9489 }, { "epoch": 0.7315757015109466, "grad_norm": 3.630469560623169, "learning_rate": 1.7736461458580324e-06, "loss": 0.8297, "step": 9490 }, { "epoch": 0.7316527906259637, "grad_norm": 3.6379334926605225, "learning_rate": 1.7726924902394976e-06, "loss": 0.7553, "step": 9491 }, { "epoch": 0.7317298797409806, "grad_norm": 3.688769817352295, "learning_rate": 1.7717390358268716e-06, "loss": 0.9635, "step": 9492 }, { "epoch": 0.7318069688559975, "grad_norm": 3.543370485305786, "learning_rate": 1.7707857826795971e-06, "loss": 0.8589, "step": 9493 }, { "epoch": 0.7318840579710145, "grad_norm": 3.616456985473633, "learning_rate": 1.7698327308571045e-06, "loss": 0.8856, "step": 9494 }, { "epoch": 0.7319611470860314, "grad_norm": 3.9555208683013916, "learning_rate": 1.7688798804188118e-06, "loss": 0.9327, "step": 9495 }, { "epoch": 0.7320382362010485, "grad_norm": 3.7948670387268066, "learning_rate": 1.767927231424124e-06, "loss": 0.9202, "step": 9496 }, { "epoch": 0.7321153253160654, "grad_norm": 3.7444069385528564, "learning_rate": 1.766974783932434e-06, "loss": 0.8917, "step": 9497 }, { "epoch": 0.7321924144310823, "grad_norm": 3.439579486846924, "learning_rate": 1.766022538003122e-06, "loss": 0.8429, "step": 9498 }, { "epoch": 0.7322695035460993, "grad_norm": 3.7228710651397705, "learning_rate": 1.7650704936955577e-06, "loss": 0.8607, "step": 9499 }, { "epoch": 0.7323465926611162, "grad_norm": 3.5966405868530273, "learning_rate": 1.7641186510690916e-06, "loss": 0.8669, "step": 9500 }, { "epoch": 0.7324236817761333, "grad_norm": 3.7146801948547363, "learning_rate": 1.763167010183069e-06, "loss": 0.8685, "step": 9501 }, { "epoch": 0.7325007708911502, "grad_norm": 3.474900484085083, "learning_rate": 1.7622155710968187e-06, "loss": 0.8428, "step": 9502 }, { "epoch": 0.7325778600061671, "grad_norm": 4.253158092498779, "learning_rate": 1.761264333869659e-06, "loss": 0.925, "step": 9503 }, { "epoch": 0.7326549491211841, "grad_norm": 4.116722106933594, "learning_rate": 1.7603132985608945e-06, "loss": 0.8248, "step": 9504 }, { "epoch": 0.732732038236201, "grad_norm": 4.452220916748047, "learning_rate": 1.7593624652298186e-06, "loss": 1.0207, "step": 9505 }, { "epoch": 0.7328091273512181, "grad_norm": 3.5835022926330566, "learning_rate": 1.7584118339357076e-06, "loss": 0.8986, "step": 9506 }, { "epoch": 0.732886216466235, "grad_norm": 3.83974289894104, "learning_rate": 1.7574614047378297e-06, "loss": 0.9099, "step": 9507 }, { "epoch": 0.7329633055812519, "grad_norm": 3.6427395343780518, "learning_rate": 1.7565111776954401e-06, "loss": 0.9213, "step": 9508 }, { "epoch": 0.7330403946962689, "grad_norm": 3.9767303466796875, "learning_rate": 1.7555611528677803e-06, "loss": 0.9611, "step": 9509 }, { "epoch": 0.7331174838112858, "grad_norm": 3.73007869720459, "learning_rate": 1.7546113303140806e-06, "loss": 0.9217, "step": 9510 }, { "epoch": 0.7331945729263029, "grad_norm": 3.5436551570892334, "learning_rate": 1.7536617100935544e-06, "loss": 0.8044, "step": 9511 }, { "epoch": 0.7332716620413198, "grad_norm": 3.655442237854004, "learning_rate": 1.7527122922654077e-06, "loss": 0.8905, "step": 9512 }, { "epoch": 0.7333487511563367, "grad_norm": 3.6758716106414795, "learning_rate": 1.751763076888831e-06, "loss": 0.9612, "step": 9513 }, { "epoch": 0.7334258402713537, "grad_norm": 3.906611204147339, "learning_rate": 1.7508140640230037e-06, "loss": 0.9355, "step": 9514 }, { "epoch": 0.7335029293863706, "grad_norm": 3.8882009983062744, "learning_rate": 1.7498652537270916e-06, "loss": 0.8655, "step": 9515 }, { "epoch": 0.7335800185013877, "grad_norm": 3.8022232055664062, "learning_rate": 1.7489166460602496e-06, "loss": 0.9754, "step": 9516 }, { "epoch": 0.7336571076164046, "grad_norm": 3.5740768909454346, "learning_rate": 1.7479682410816156e-06, "loss": 0.9293, "step": 9517 }, { "epoch": 0.7337341967314215, "grad_norm": 3.5046918392181396, "learning_rate": 1.7470200388503184e-06, "loss": 0.9062, "step": 9518 }, { "epoch": 0.7338112858464385, "grad_norm": 3.434080123901367, "learning_rate": 1.7460720394254748e-06, "loss": 0.8451, "step": 9519 }, { "epoch": 0.7338883749614554, "grad_norm": 3.992218255996704, "learning_rate": 1.7451242428661868e-06, "loss": 0.9591, "step": 9520 }, { "epoch": 0.7339654640764725, "grad_norm": 3.7113566398620605, "learning_rate": 1.7441766492315465e-06, "loss": 0.9268, "step": 9521 }, { "epoch": 0.7340425531914894, "grad_norm": 3.7934823036193848, "learning_rate": 1.7432292585806277e-06, "loss": 0.8757, "step": 9522 }, { "epoch": 0.7341196423065063, "grad_norm": 3.7156882286071777, "learning_rate": 1.742282070972498e-06, "loss": 0.9287, "step": 9523 }, { "epoch": 0.7341967314215233, "grad_norm": 3.4963877201080322, "learning_rate": 1.7413350864662088e-06, "loss": 0.8622, "step": 9524 }, { "epoch": 0.7342738205365402, "grad_norm": 3.512899160385132, "learning_rate": 1.7403883051207997e-06, "loss": 0.8646, "step": 9525 }, { "epoch": 0.7343509096515572, "grad_norm": 3.7643868923187256, "learning_rate": 1.739441726995298e-06, "loss": 0.9348, "step": 9526 }, { "epoch": 0.7344279987665742, "grad_norm": 3.798492431640625, "learning_rate": 1.7384953521487191e-06, "loss": 0.9234, "step": 9527 }, { "epoch": 0.7345050878815911, "grad_norm": 3.6906044483184814, "learning_rate": 1.737549180640062e-06, "loss": 0.983, "step": 9528 }, { "epoch": 0.7345821769966081, "grad_norm": 3.6873204708099365, "learning_rate": 1.7366032125283167e-06, "loss": 0.9359, "step": 9529 }, { "epoch": 0.734659266111625, "grad_norm": 3.9488134384155273, "learning_rate": 1.7356574478724593e-06, "loss": 0.9906, "step": 9530 }, { "epoch": 0.734736355226642, "grad_norm": 3.8538029193878174, "learning_rate": 1.7347118867314538e-06, "loss": 0.9494, "step": 9531 }, { "epoch": 0.734813444341659, "grad_norm": 3.5995075702667236, "learning_rate": 1.7337665291642524e-06, "loss": 0.8652, "step": 9532 }, { "epoch": 0.7348905334566759, "grad_norm": 3.970221519470215, "learning_rate": 1.7328213752297902e-06, "loss": 0.9299, "step": 9533 }, { "epoch": 0.7349676225716929, "grad_norm": 3.456852674484253, "learning_rate": 1.7318764249869934e-06, "loss": 0.9381, "step": 9534 }, { "epoch": 0.7350447116867098, "grad_norm": 3.492973566055298, "learning_rate": 1.730931678494776e-06, "loss": 0.9255, "step": 9535 }, { "epoch": 0.7351218008017268, "grad_norm": 3.6652021408081055, "learning_rate": 1.7299871358120373e-06, "loss": 0.9341, "step": 9536 }, { "epoch": 0.7351988899167438, "grad_norm": 3.6004602909088135, "learning_rate": 1.729042796997667e-06, "loss": 0.9393, "step": 9537 }, { "epoch": 0.7352759790317607, "grad_norm": 3.624760150909424, "learning_rate": 1.7280986621105355e-06, "loss": 0.9113, "step": 9538 }, { "epoch": 0.7353530681467777, "grad_norm": 3.834989309310913, "learning_rate": 1.7271547312095055e-06, "loss": 0.9726, "step": 9539 }, { "epoch": 0.7354301572617946, "grad_norm": 3.8131542205810547, "learning_rate": 1.7262110043534285e-06, "loss": 0.9446, "step": 9540 }, { "epoch": 0.7355072463768116, "grad_norm": 3.5170748233795166, "learning_rate": 1.7252674816011405e-06, "loss": 0.8592, "step": 9541 }, { "epoch": 0.7355843354918286, "grad_norm": 3.7116787433624268, "learning_rate": 1.7243241630114665e-06, "loss": 0.9016, "step": 9542 }, { "epoch": 0.7356614246068455, "grad_norm": 3.888503313064575, "learning_rate": 1.723381048643214e-06, "loss": 0.938, "step": 9543 }, { "epoch": 0.7357385137218625, "grad_norm": 3.5734593868255615, "learning_rate": 1.722438138555183e-06, "loss": 0.8522, "step": 9544 }, { "epoch": 0.7358156028368794, "grad_norm": 3.421564817428589, "learning_rate": 1.7214954328061588e-06, "loss": 0.9318, "step": 9545 }, { "epoch": 0.7358926919518964, "grad_norm": 3.7123959064483643, "learning_rate": 1.7205529314549153e-06, "loss": 0.9123, "step": 9546 }, { "epoch": 0.7359697810669134, "grad_norm": 3.348281145095825, "learning_rate": 1.7196106345602126e-06, "loss": 0.8201, "step": 9547 }, { "epoch": 0.7360468701819303, "grad_norm": 4.12083625793457, "learning_rate": 1.7186685421807964e-06, "loss": 0.9747, "step": 9548 }, { "epoch": 0.7361239592969473, "grad_norm": 3.7178947925567627, "learning_rate": 1.7177266543754018e-06, "loss": 0.9056, "step": 9549 }, { "epoch": 0.7362010484119642, "grad_norm": 4.042630672454834, "learning_rate": 1.7167849712027507e-06, "loss": 0.9108, "step": 9550 }, { "epoch": 0.7362781375269812, "grad_norm": 3.405878782272339, "learning_rate": 1.7158434927215528e-06, "loss": 0.8586, "step": 9551 }, { "epoch": 0.7363552266419982, "grad_norm": 3.563534736633301, "learning_rate": 1.7149022189905041e-06, "loss": 0.8012, "step": 9552 }, { "epoch": 0.7364323157570151, "grad_norm": 3.4462010860443115, "learning_rate": 1.7139611500682896e-06, "loss": 0.874, "step": 9553 }, { "epoch": 0.7365094048720321, "grad_norm": 3.5920019149780273, "learning_rate": 1.713020286013577e-06, "loss": 0.8469, "step": 9554 }, { "epoch": 0.736586493987049, "grad_norm": 3.872889995574951, "learning_rate": 1.7120796268850254e-06, "loss": 0.8842, "step": 9555 }, { "epoch": 0.736663583102066, "grad_norm": 4.169905185699463, "learning_rate": 1.7111391727412807e-06, "loss": 1.0117, "step": 9556 }, { "epoch": 0.736740672217083, "grad_norm": 4.358822345733643, "learning_rate": 1.7101989236409754e-06, "loss": 0.9126, "step": 9557 }, { "epoch": 0.7368177613320999, "grad_norm": 3.7961339950561523, "learning_rate": 1.7092588796427306e-06, "loss": 1.0046, "step": 9558 }, { "epoch": 0.7368948504471169, "grad_norm": 3.6711761951446533, "learning_rate": 1.7083190408051498e-06, "loss": 0.8484, "step": 9559 }, { "epoch": 0.7369719395621338, "grad_norm": 3.702301502227783, "learning_rate": 1.7073794071868283e-06, "loss": 0.8271, "step": 9560 }, { "epoch": 0.7370490286771508, "grad_norm": 3.758284330368042, "learning_rate": 1.7064399788463482e-06, "loss": 1.0023, "step": 9561 }, { "epoch": 0.7371261177921677, "grad_norm": 3.5002236366271973, "learning_rate": 1.7055007558422776e-06, "loss": 0.8328, "step": 9562 }, { "epoch": 0.7372032069071847, "grad_norm": 3.4732120037078857, "learning_rate": 1.7045617382331726e-06, "loss": 0.794, "step": 9563 }, { "epoch": 0.7372802960222017, "grad_norm": 4.162208080291748, "learning_rate": 1.7036229260775766e-06, "loss": 1.033, "step": 9564 }, { "epoch": 0.7373573851372186, "grad_norm": 3.8932626247406006, "learning_rate": 1.7026843194340176e-06, "loss": 0.9436, "step": 9565 }, { "epoch": 0.7374344742522356, "grad_norm": 3.9444522857666016, "learning_rate": 1.7017459183610142e-06, "loss": 0.9912, "step": 9566 }, { "epoch": 0.7375115633672525, "grad_norm": 3.844452381134033, "learning_rate": 1.70080772291707e-06, "loss": 0.8048, "step": 9567 }, { "epoch": 0.7375886524822695, "grad_norm": 3.734955072402954, "learning_rate": 1.6998697331606777e-06, "loss": 0.9294, "step": 9568 }, { "epoch": 0.7376657415972865, "grad_norm": 3.940300226211548, "learning_rate": 1.6989319491503175e-06, "loss": 0.9772, "step": 9569 }, { "epoch": 0.7377428307123034, "grad_norm": 3.8375890254974365, "learning_rate": 1.697994370944452e-06, "loss": 0.9545, "step": 9570 }, { "epoch": 0.7378199198273204, "grad_norm": 3.75097393989563, "learning_rate": 1.6970569986015356e-06, "loss": 0.9672, "step": 9571 }, { "epoch": 0.7378970089423373, "grad_norm": 3.56791353225708, "learning_rate": 1.6961198321800092e-06, "loss": 0.9334, "step": 9572 }, { "epoch": 0.7379740980573543, "grad_norm": 3.3650386333465576, "learning_rate": 1.6951828717383e-06, "loss": 0.8871, "step": 9573 }, { "epoch": 0.7380511871723713, "grad_norm": 3.7570347785949707, "learning_rate": 1.694246117334823e-06, "loss": 0.9818, "step": 9574 }, { "epoch": 0.7381282762873882, "grad_norm": 3.496140241622925, "learning_rate": 1.6933095690279805e-06, "loss": 0.907, "step": 9575 }, { "epoch": 0.7382053654024052, "grad_norm": 3.67641019821167, "learning_rate": 1.6923732268761594e-06, "loss": 0.8191, "step": 9576 }, { "epoch": 0.7382824545174221, "grad_norm": 3.6642329692840576, "learning_rate": 1.691437090937737e-06, "loss": 0.8536, "step": 9577 }, { "epoch": 0.738359543632439, "grad_norm": 3.9450578689575195, "learning_rate": 1.6905011612710764e-06, "loss": 0.8556, "step": 9578 }, { "epoch": 0.7384366327474561, "grad_norm": 4.003316402435303, "learning_rate": 1.6895654379345282e-06, "loss": 0.9955, "step": 9579 }, { "epoch": 0.738513721862473, "grad_norm": 3.8521080017089844, "learning_rate": 1.6886299209864316e-06, "loss": 0.888, "step": 9580 }, { "epoch": 0.73859081097749, "grad_norm": 3.9132065773010254, "learning_rate": 1.687694610485107e-06, "loss": 0.9501, "step": 9581 }, { "epoch": 0.7386679000925069, "grad_norm": 3.6591548919677734, "learning_rate": 1.6867595064888693e-06, "loss": 0.8745, "step": 9582 }, { "epoch": 0.7387449892075239, "grad_norm": 4.419125556945801, "learning_rate": 1.685824609056015e-06, "loss": 0.9704, "step": 9583 }, { "epoch": 0.7388220783225409, "grad_norm": 3.558927297592163, "learning_rate": 1.6848899182448347e-06, "loss": 0.9493, "step": 9584 }, { "epoch": 0.7388991674375578, "grad_norm": 3.7360217571258545, "learning_rate": 1.6839554341135973e-06, "loss": 0.9127, "step": 9585 }, { "epoch": 0.7389762565525748, "grad_norm": 4.130924701690674, "learning_rate": 1.683021156720564e-06, "loss": 0.8786, "step": 9586 }, { "epoch": 0.7390533456675917, "grad_norm": 3.717590808868408, "learning_rate": 1.6820870861239824e-06, "loss": 0.8723, "step": 9587 }, { "epoch": 0.7391304347826086, "grad_norm": 3.745699405670166, "learning_rate": 1.6811532223820875e-06, "loss": 0.8268, "step": 9588 }, { "epoch": 0.7392075238976257, "grad_norm": 3.7815754413604736, "learning_rate": 1.6802195655531e-06, "loss": 0.9505, "step": 9589 }, { "epoch": 0.7392846130126426, "grad_norm": 4.17587947845459, "learning_rate": 1.6792861156952312e-06, "loss": 0.9202, "step": 9590 }, { "epoch": 0.7393617021276596, "grad_norm": 3.6216659545898438, "learning_rate": 1.6783528728666725e-06, "loss": 0.8253, "step": 9591 }, { "epoch": 0.7394387912426765, "grad_norm": 3.885458469390869, "learning_rate": 1.677419837125609e-06, "loss": 1.0538, "step": 9592 }, { "epoch": 0.7395158803576934, "grad_norm": 3.8355486392974854, "learning_rate": 1.676487008530211e-06, "loss": 0.9182, "step": 9593 }, { "epoch": 0.7395929694727105, "grad_norm": 4.003592014312744, "learning_rate": 1.6755543871386342e-06, "loss": 0.9035, "step": 9594 }, { "epoch": 0.7396700585877274, "grad_norm": 3.761218309402466, "learning_rate": 1.6746219730090262e-06, "loss": 0.8466, "step": 9595 }, { "epoch": 0.7397471477027444, "grad_norm": 3.5276873111724854, "learning_rate": 1.6736897661995132e-06, "loss": 0.8705, "step": 9596 }, { "epoch": 0.7398242368177613, "grad_norm": 3.678708791732788, "learning_rate": 1.6727577667682165e-06, "loss": 0.8528, "step": 9597 }, { "epoch": 0.7399013259327782, "grad_norm": 3.7199313640594482, "learning_rate": 1.6718259747732407e-06, "loss": 0.9636, "step": 9598 }, { "epoch": 0.7399784150477953, "grad_norm": 3.733581304550171, "learning_rate": 1.6708943902726783e-06, "loss": 0.9271, "step": 9599 }, { "epoch": 0.7400555041628122, "grad_norm": 3.6299502849578857, "learning_rate": 1.6699630133246087e-06, "loss": 0.9428, "step": 9600 }, { "epoch": 0.7401325932778292, "grad_norm": 3.8488755226135254, "learning_rate": 1.6690318439871e-06, "loss": 0.7749, "step": 9601 }, { "epoch": 0.7402096823928461, "grad_norm": 3.60003399848938, "learning_rate": 1.6681008823182027e-06, "loss": 0.9667, "step": 9602 }, { "epoch": 0.740286771507863, "grad_norm": 3.882335662841797, "learning_rate": 1.6671701283759596e-06, "loss": 0.9634, "step": 9603 }, { "epoch": 0.7403638606228801, "grad_norm": 3.8450510501861572, "learning_rate": 1.6662395822183975e-06, "loss": 0.7925, "step": 9604 }, { "epoch": 0.740440949737897, "grad_norm": 3.650480031967163, "learning_rate": 1.6653092439035312e-06, "loss": 0.8782, "step": 9605 }, { "epoch": 0.740518038852914, "grad_norm": 3.5009539127349854, "learning_rate": 1.6643791134893644e-06, "loss": 0.952, "step": 9606 }, { "epoch": 0.7405951279679309, "grad_norm": 3.5242550373077393, "learning_rate": 1.6634491910338829e-06, "loss": 0.9471, "step": 9607 }, { "epoch": 0.7406722170829478, "grad_norm": 3.7609641551971436, "learning_rate": 1.6625194765950636e-06, "loss": 0.8637, "step": 9608 }, { "epoch": 0.7407493061979649, "grad_norm": 3.836843729019165, "learning_rate": 1.6615899702308696e-06, "loss": 0.9317, "step": 9609 }, { "epoch": 0.7408263953129818, "grad_norm": 4.039276599884033, "learning_rate": 1.6606606719992513e-06, "loss": 0.9741, "step": 9610 }, { "epoch": 0.7409034844279988, "grad_norm": 3.873875379562378, "learning_rate": 1.6597315819581449e-06, "loss": 0.8736, "step": 9611 }, { "epoch": 0.7409805735430157, "grad_norm": 3.878174066543579, "learning_rate": 1.6588027001654765e-06, "loss": 0.9261, "step": 9612 }, { "epoch": 0.7410576626580326, "grad_norm": 3.606624126434326, "learning_rate": 1.6578740266791532e-06, "loss": 0.8731, "step": 9613 }, { "epoch": 0.7411347517730497, "grad_norm": 3.7116219997406006, "learning_rate": 1.6569455615570757e-06, "loss": 0.8865, "step": 9614 }, { "epoch": 0.7412118408880666, "grad_norm": 3.811680316925049, "learning_rate": 1.6560173048571277e-06, "loss": 0.938, "step": 9615 }, { "epoch": 0.7412889300030836, "grad_norm": 3.9601168632507324, "learning_rate": 1.6550892566371823e-06, "loss": 0.9875, "step": 9616 }, { "epoch": 0.7413660191181005, "grad_norm": 3.370882034301758, "learning_rate": 1.654161416955099e-06, "loss": 0.878, "step": 9617 }, { "epoch": 0.7414431082331174, "grad_norm": 4.08239221572876, "learning_rate": 1.653233785868722e-06, "loss": 1.0293, "step": 9618 }, { "epoch": 0.7415201973481345, "grad_norm": 3.562781572341919, "learning_rate": 1.6523063634358844e-06, "loss": 0.8308, "step": 9619 }, { "epoch": 0.7415972864631514, "grad_norm": 3.801961898803711, "learning_rate": 1.6513791497144071e-06, "loss": 0.9176, "step": 9620 }, { "epoch": 0.7416743755781684, "grad_norm": 3.9425201416015625, "learning_rate": 1.6504521447620969e-06, "loss": 0.9052, "step": 9621 }, { "epoch": 0.7417514646931853, "grad_norm": 3.5563175678253174, "learning_rate": 1.649525348636748e-06, "loss": 1.0543, "step": 9622 }, { "epoch": 0.7418285538082022, "grad_norm": 3.3809213638305664, "learning_rate": 1.6485987613961423e-06, "loss": 0.8341, "step": 9623 }, { "epoch": 0.7419056429232193, "grad_norm": 3.588660717010498, "learning_rate": 1.6476723830980451e-06, "loss": 0.7934, "step": 9624 }, { "epoch": 0.7419827320382362, "grad_norm": 3.640718460083008, "learning_rate": 1.6467462138002126e-06, "loss": 0.9354, "step": 9625 }, { "epoch": 0.7420598211532532, "grad_norm": 3.681979179382324, "learning_rate": 1.6458202535603867e-06, "loss": 0.9655, "step": 9626 }, { "epoch": 0.7421369102682701, "grad_norm": 4.223269939422607, "learning_rate": 1.6448945024362962e-06, "loss": 0.965, "step": 9627 }, { "epoch": 0.742213999383287, "grad_norm": 3.7559590339660645, "learning_rate": 1.6439689604856568e-06, "loss": 0.9417, "step": 9628 }, { "epoch": 0.742291088498304, "grad_norm": 3.631321668624878, "learning_rate": 1.6430436277661715e-06, "loss": 0.8356, "step": 9629 }, { "epoch": 0.742368177613321, "grad_norm": 3.696200370788574, "learning_rate": 1.6421185043355304e-06, "loss": 0.9243, "step": 9630 }, { "epoch": 0.742445266728338, "grad_norm": 3.6229546070098877, "learning_rate": 1.6411935902514086e-06, "loss": 0.8577, "step": 9631 }, { "epoch": 0.7425223558433549, "grad_norm": 3.96211314201355, "learning_rate": 1.6402688855714733e-06, "loss": 0.9646, "step": 9632 }, { "epoch": 0.7425994449583718, "grad_norm": 3.6700477600097656, "learning_rate": 1.6393443903533707e-06, "loss": 0.9983, "step": 9633 }, { "epoch": 0.7426765340733888, "grad_norm": 4.209187030792236, "learning_rate": 1.6384201046547399e-06, "loss": 1.0566, "step": 9634 }, { "epoch": 0.7427536231884058, "grad_norm": 3.9726881980895996, "learning_rate": 1.6374960285332053e-06, "loss": 1.0066, "step": 9635 }, { "epoch": 0.7428307123034228, "grad_norm": 3.917154550552368, "learning_rate": 1.6365721620463786e-06, "loss": 0.9755, "step": 9636 }, { "epoch": 0.7429078014184397, "grad_norm": 4.186873435974121, "learning_rate": 1.6356485052518578e-06, "loss": 0.9928, "step": 9637 }, { "epoch": 0.7429848905334566, "grad_norm": 3.5882840156555176, "learning_rate": 1.6347250582072305e-06, "loss": 0.8609, "step": 9638 }, { "epoch": 0.7430619796484736, "grad_norm": 3.6521730422973633, "learning_rate": 1.6338018209700647e-06, "loss": 0.9443, "step": 9639 }, { "epoch": 0.7431390687634906, "grad_norm": 3.2607851028442383, "learning_rate": 1.6328787935979207e-06, "loss": 0.8569, "step": 9640 }, { "epoch": 0.7432161578785076, "grad_norm": 3.7790210247039795, "learning_rate": 1.6319559761483461e-06, "loss": 0.8764, "step": 9641 }, { "epoch": 0.7432932469935245, "grad_norm": 3.771569013595581, "learning_rate": 1.631033368678872e-06, "loss": 0.9934, "step": 9642 }, { "epoch": 0.7433703361085414, "grad_norm": 3.3746719360351562, "learning_rate": 1.6301109712470214e-06, "loss": 0.7936, "step": 9643 }, { "epoch": 0.7434474252235584, "grad_norm": 3.9737672805786133, "learning_rate": 1.6291887839102966e-06, "loss": 0.9538, "step": 9644 }, { "epoch": 0.7435245143385754, "grad_norm": 4.225729465484619, "learning_rate": 1.6282668067261935e-06, "loss": 0.8667, "step": 9645 }, { "epoch": 0.7436016034535924, "grad_norm": 3.5801405906677246, "learning_rate": 1.6273450397521922e-06, "loss": 0.8745, "step": 9646 }, { "epoch": 0.7436786925686093, "grad_norm": 3.57384991645813, "learning_rate": 1.6264234830457603e-06, "loss": 0.8146, "step": 9647 }, { "epoch": 0.7437557816836263, "grad_norm": 3.640131711959839, "learning_rate": 1.625502136664352e-06, "loss": 0.9404, "step": 9648 }, { "epoch": 0.7438328707986432, "grad_norm": 3.9445488452911377, "learning_rate": 1.62458100066541e-06, "loss": 1.0291, "step": 9649 }, { "epoch": 0.7439099599136602, "grad_norm": 3.508561134338379, "learning_rate": 1.6236600751063597e-06, "loss": 0.8162, "step": 9650 }, { "epoch": 0.7439870490286772, "grad_norm": 3.63757586479187, "learning_rate": 1.6227393600446168e-06, "loss": 0.8668, "step": 9651 }, { "epoch": 0.7440641381436941, "grad_norm": 3.6220133304595947, "learning_rate": 1.6218188555375836e-06, "loss": 0.8303, "step": 9652 }, { "epoch": 0.7441412272587111, "grad_norm": 3.732778310775757, "learning_rate": 1.6208985616426488e-06, "loss": 0.9131, "step": 9653 }, { "epoch": 0.744218316373728, "grad_norm": 3.7289443016052246, "learning_rate": 1.619978478417189e-06, "loss": 1.0164, "step": 9654 }, { "epoch": 0.744295405488745, "grad_norm": 3.5976722240448, "learning_rate": 1.6190586059185642e-06, "loss": 0.8991, "step": 9655 }, { "epoch": 0.744372494603762, "grad_norm": 3.97445011138916, "learning_rate": 1.618138944204125e-06, "loss": 0.9376, "step": 9656 }, { "epoch": 0.7444495837187789, "grad_norm": 3.541771173477173, "learning_rate": 1.617219493331208e-06, "loss": 0.929, "step": 9657 }, { "epoch": 0.7445266728337959, "grad_norm": 3.4124934673309326, "learning_rate": 1.6163002533571348e-06, "loss": 0.8519, "step": 9658 }, { "epoch": 0.7446037619488128, "grad_norm": 3.9390342235565186, "learning_rate": 1.6153812243392169e-06, "loss": 0.8352, "step": 9659 }, { "epoch": 0.7446808510638298, "grad_norm": 3.7517385482788086, "learning_rate": 1.6144624063347514e-06, "loss": 1.0166, "step": 9660 }, { "epoch": 0.7447579401788468, "grad_norm": 3.8233649730682373, "learning_rate": 1.6135437994010195e-06, "loss": 0.8967, "step": 9661 }, { "epoch": 0.7448350292938637, "grad_norm": 4.343902111053467, "learning_rate": 1.6126254035952926e-06, "loss": 0.9621, "step": 9662 }, { "epoch": 0.7449121184088807, "grad_norm": 3.828294038772583, "learning_rate": 1.6117072189748285e-06, "loss": 0.9517, "step": 9663 }, { "epoch": 0.7449892075238976, "grad_norm": 4.02465295791626, "learning_rate": 1.6107892455968704e-06, "loss": 0.9339, "step": 9664 }, { "epoch": 0.7450662966389145, "grad_norm": 3.764190673828125, "learning_rate": 1.6098714835186512e-06, "loss": 0.9529, "step": 9665 }, { "epoch": 0.7451433857539316, "grad_norm": 3.6371004581451416, "learning_rate": 1.6089539327973857e-06, "loss": 0.8936, "step": 9666 }, { "epoch": 0.7452204748689485, "grad_norm": 3.513137102127075, "learning_rate": 1.6080365934902798e-06, "loss": 0.9337, "step": 9667 }, { "epoch": 0.7452975639839655, "grad_norm": 4.115825653076172, "learning_rate": 1.6071194656545246e-06, "loss": 0.9739, "step": 9668 }, { "epoch": 0.7453746530989824, "grad_norm": 4.290143966674805, "learning_rate": 1.6062025493472988e-06, "loss": 1.0652, "step": 9669 }, { "epoch": 0.7454517422139993, "grad_norm": 3.6070821285247803, "learning_rate": 1.6052858446257674e-06, "loss": 0.866, "step": 9670 }, { "epoch": 0.7455288313290164, "grad_norm": 3.7635834217071533, "learning_rate": 1.6043693515470833e-06, "loss": 0.8835, "step": 9671 }, { "epoch": 0.7456059204440333, "grad_norm": 3.815763473510742, "learning_rate": 1.6034530701683804e-06, "loss": 0.8308, "step": 9672 }, { "epoch": 0.7456830095590503, "grad_norm": 3.4556825160980225, "learning_rate": 1.6025370005467889e-06, "loss": 0.8331, "step": 9673 }, { "epoch": 0.7457600986740672, "grad_norm": 3.467095375061035, "learning_rate": 1.6016211427394196e-06, "loss": 0.7999, "step": 9674 }, { "epoch": 0.7458371877890841, "grad_norm": 4.090667724609375, "learning_rate": 1.600705496803373e-06, "loss": 0.8447, "step": 9675 }, { "epoch": 0.7459142769041012, "grad_norm": 3.751192092895508, "learning_rate": 1.5997900627957318e-06, "loss": 0.9856, "step": 9676 }, { "epoch": 0.7459913660191181, "grad_norm": 3.7970805168151855, "learning_rate": 1.5988748407735698e-06, "loss": 0.8911, "step": 9677 }, { "epoch": 0.7460684551341351, "grad_norm": 3.705199956893921, "learning_rate": 1.597959830793947e-06, "loss": 0.8308, "step": 9678 }, { "epoch": 0.746145544249152, "grad_norm": 3.4573512077331543, "learning_rate": 1.5970450329139087e-06, "loss": 0.9452, "step": 9679 }, { "epoch": 0.7462226333641689, "grad_norm": 3.6191141605377197, "learning_rate": 1.5961304471904897e-06, "loss": 0.953, "step": 9680 }, { "epoch": 0.746299722479186, "grad_norm": 4.017669677734375, "learning_rate": 1.595216073680707e-06, "loss": 0.9276, "step": 9681 }, { "epoch": 0.7463768115942029, "grad_norm": 3.8973758220672607, "learning_rate": 1.5943019124415687e-06, "loss": 0.8892, "step": 9682 }, { "epoch": 0.7464539007092199, "grad_norm": 3.952500104904175, "learning_rate": 1.593387963530067e-06, "loss": 0.9615, "step": 9683 }, { "epoch": 0.7465309898242368, "grad_norm": 3.6438992023468018, "learning_rate": 1.5924742270031823e-06, "loss": 0.8843, "step": 9684 }, { "epoch": 0.7466080789392537, "grad_norm": 3.6509311199188232, "learning_rate": 1.591560702917882e-06, "loss": 0.9018, "step": 9685 }, { "epoch": 0.7466851680542708, "grad_norm": 4.112860202789307, "learning_rate": 1.5906473913311204e-06, "loss": 0.9559, "step": 9686 }, { "epoch": 0.7467622571692877, "grad_norm": 3.7320613861083984, "learning_rate": 1.589734292299835e-06, "loss": 0.9533, "step": 9687 }, { "epoch": 0.7468393462843047, "grad_norm": 3.6934304237365723, "learning_rate": 1.5888214058809543e-06, "loss": 0.9533, "step": 9688 }, { "epoch": 0.7469164353993216, "grad_norm": 3.7583377361297607, "learning_rate": 1.587908732131392e-06, "loss": 0.8712, "step": 9689 }, { "epoch": 0.7469935245143385, "grad_norm": 3.579685926437378, "learning_rate": 1.5869962711080483e-06, "loss": 0.9753, "step": 9690 }, { "epoch": 0.7470706136293556, "grad_norm": 3.7619788646698, "learning_rate": 1.5860840228678126e-06, "loss": 0.9095, "step": 9691 }, { "epoch": 0.7471477027443725, "grad_norm": 3.7893290519714355, "learning_rate": 1.5851719874675552e-06, "loss": 0.9442, "step": 9692 }, { "epoch": 0.7472247918593895, "grad_norm": 3.8225247859954834, "learning_rate": 1.5842601649641388e-06, "loss": 0.8569, "step": 9693 }, { "epoch": 0.7473018809744064, "grad_norm": 3.59032940864563, "learning_rate": 1.58334855541441e-06, "loss": 0.9241, "step": 9694 }, { "epoch": 0.7473789700894233, "grad_norm": 3.8772151470184326, "learning_rate": 1.5824371588752042e-06, "loss": 0.8766, "step": 9695 }, { "epoch": 0.7474560592044404, "grad_norm": 3.8452553749084473, "learning_rate": 1.5815259754033407e-06, "loss": 0.9639, "step": 9696 }, { "epoch": 0.7475331483194573, "grad_norm": 4.228387355804443, "learning_rate": 1.5806150050556302e-06, "loss": 1.0822, "step": 9697 }, { "epoch": 0.7476102374344743, "grad_norm": 3.8701577186584473, "learning_rate": 1.579704247888863e-06, "loss": 0.9458, "step": 9698 }, { "epoch": 0.7476873265494912, "grad_norm": 4.092501640319824, "learning_rate": 1.5787937039598217e-06, "loss": 0.9963, "step": 9699 }, { "epoch": 0.7477644156645081, "grad_norm": 3.4330146312713623, "learning_rate": 1.5778833733252735e-06, "loss": 0.889, "step": 9700 }, { "epoch": 0.7478415047795252, "grad_norm": 3.8023056983947754, "learning_rate": 1.5769732560419742e-06, "loss": 0.8818, "step": 9701 }, { "epoch": 0.7479185938945421, "grad_norm": 3.714808464050293, "learning_rate": 1.5760633521666652e-06, "loss": 1.0705, "step": 9702 }, { "epoch": 0.7479956830095591, "grad_norm": 4.097212314605713, "learning_rate": 1.5751536617560715e-06, "loss": 0.8879, "step": 9703 }, { "epoch": 0.748072772124576, "grad_norm": 3.7691214084625244, "learning_rate": 1.57424418486691e-06, "loss": 1.0334, "step": 9704 }, { "epoch": 0.7481498612395929, "grad_norm": 3.9305317401885986, "learning_rate": 1.5733349215558802e-06, "loss": 1.0065, "step": 9705 }, { "epoch": 0.74822695035461, "grad_norm": 4.348329067230225, "learning_rate": 1.5724258718796714e-06, "loss": 1.1138, "step": 9706 }, { "epoch": 0.7483040394696269, "grad_norm": 4.216433048248291, "learning_rate": 1.5715170358949572e-06, "loss": 1.008, "step": 9707 }, { "epoch": 0.7483811285846439, "grad_norm": 3.6003470420837402, "learning_rate": 1.5706084136584016e-06, "loss": 0.8693, "step": 9708 }, { "epoch": 0.7484582176996608, "grad_norm": 4.085456848144531, "learning_rate": 1.5697000052266475e-06, "loss": 0.8981, "step": 9709 }, { "epoch": 0.7485353068146777, "grad_norm": 3.8469676971435547, "learning_rate": 1.5687918106563326e-06, "loss": 0.838, "step": 9710 }, { "epoch": 0.7486123959296948, "grad_norm": 3.8500454425811768, "learning_rate": 1.5678838300040783e-06, "loss": 0.9659, "step": 9711 }, { "epoch": 0.7486894850447117, "grad_norm": 4.028532028198242, "learning_rate": 1.5669760633264908e-06, "loss": 0.9956, "step": 9712 }, { "epoch": 0.7487665741597287, "grad_norm": 3.699516534805298, "learning_rate": 1.5660685106801677e-06, "loss": 0.9479, "step": 9713 }, { "epoch": 0.7488436632747456, "grad_norm": 3.5519814491271973, "learning_rate": 1.5651611721216865e-06, "loss": 0.8591, "step": 9714 }, { "epoch": 0.7489207523897625, "grad_norm": 3.9628407955169678, "learning_rate": 1.5642540477076169e-06, "loss": 1.0351, "step": 9715 }, { "epoch": 0.7489978415047795, "grad_norm": 3.8148396015167236, "learning_rate": 1.5633471374945113e-06, "loss": 1.0393, "step": 9716 }, { "epoch": 0.7490749306197965, "grad_norm": 4.2555999755859375, "learning_rate": 1.5624404415389166e-06, "loss": 0.8896, "step": 9717 }, { "epoch": 0.7491520197348135, "grad_norm": 3.7964389324188232, "learning_rate": 1.5615339598973544e-06, "loss": 0.9671, "step": 9718 }, { "epoch": 0.7492291088498304, "grad_norm": 3.563861608505249, "learning_rate": 1.560627692626342e-06, "loss": 0.9568, "step": 9719 }, { "epoch": 0.7493061979648473, "grad_norm": 4.131255626678467, "learning_rate": 1.55972163978238e-06, "loss": 0.8972, "step": 9720 }, { "epoch": 0.7493832870798643, "grad_norm": 3.94917893409729, "learning_rate": 1.5588158014219563e-06, "loss": 0.9817, "step": 9721 }, { "epoch": 0.7494603761948813, "grad_norm": 3.44014835357666, "learning_rate": 1.5579101776015443e-06, "loss": 0.8567, "step": 9722 }, { "epoch": 0.7495374653098983, "grad_norm": 3.9482109546661377, "learning_rate": 1.557004768377608e-06, "loss": 0.9041, "step": 9723 }, { "epoch": 0.7496145544249152, "grad_norm": 3.8712923526763916, "learning_rate": 1.556099573806591e-06, "loss": 0.9819, "step": 9724 }, { "epoch": 0.7496916435399321, "grad_norm": 3.6934661865234375, "learning_rate": 1.5551945939449287e-06, "loss": 0.9745, "step": 9725 }, { "epoch": 0.7497687326549491, "grad_norm": 3.556561231613159, "learning_rate": 1.5542898288490426e-06, "loss": 0.9324, "step": 9726 }, { "epoch": 0.7498458217699661, "grad_norm": 3.59233021736145, "learning_rate": 1.5533852785753401e-06, "loss": 0.8831, "step": 9727 }, { "epoch": 0.7499229108849831, "grad_norm": 3.663569450378418, "learning_rate": 1.5524809431802162e-06, "loss": 1.0045, "step": 9728 }, { "epoch": 0.75, "grad_norm": 3.9396040439605713, "learning_rate": 1.551576822720049e-06, "loss": 0.9329, "step": 9729 }, { "epoch": 0.7500770891150169, "grad_norm": 3.6608591079711914, "learning_rate": 1.550672917251207e-06, "loss": 0.8829, "step": 9730 }, { "epoch": 0.7501541782300339, "grad_norm": 3.617097854614258, "learning_rate": 1.5497692268300442e-06, "loss": 0.9165, "step": 9731 }, { "epoch": 0.7502312673450509, "grad_norm": 3.6227474212646484, "learning_rate": 1.5488657515129001e-06, "loss": 0.8429, "step": 9732 }, { "epoch": 0.7503083564600679, "grad_norm": 3.4037904739379883, "learning_rate": 1.5479624913561037e-06, "loss": 0.8714, "step": 9733 }, { "epoch": 0.7503854455750848, "grad_norm": 4.304060459136963, "learning_rate": 1.5470594464159682e-06, "loss": 0.9096, "step": 9734 }, { "epoch": 0.7504625346901017, "grad_norm": 3.9946959018707275, "learning_rate": 1.5461566167487918e-06, "loss": 0.9868, "step": 9735 }, { "epoch": 0.7505396238051187, "grad_norm": 3.8413825035095215, "learning_rate": 1.5452540024108625e-06, "loss": 0.9049, "step": 9736 }, { "epoch": 0.7506167129201357, "grad_norm": 3.6537039279937744, "learning_rate": 1.5443516034584533e-06, "loss": 0.9031, "step": 9737 }, { "epoch": 0.7506938020351527, "grad_norm": 3.7991750240325928, "learning_rate": 1.5434494199478245e-06, "loss": 0.8957, "step": 9738 }, { "epoch": 0.7507708911501696, "grad_norm": 3.8140668869018555, "learning_rate": 1.5425474519352234e-06, "loss": 0.9202, "step": 9739 }, { "epoch": 0.7508479802651865, "grad_norm": 3.7266762256622314, "learning_rate": 1.5416456994768813e-06, "loss": 0.919, "step": 9740 }, { "epoch": 0.7509250693802035, "grad_norm": 3.538825511932373, "learning_rate": 1.5407441626290181e-06, "loss": 0.872, "step": 9741 }, { "epoch": 0.7510021584952205, "grad_norm": 3.42117977142334, "learning_rate": 1.5398428414478407e-06, "loss": 0.7402, "step": 9742 }, { "epoch": 0.7510792476102375, "grad_norm": 3.397956132888794, "learning_rate": 1.5389417359895415e-06, "loss": 0.8549, "step": 9743 }, { "epoch": 0.7511563367252544, "grad_norm": 3.6643259525299072, "learning_rate": 1.5380408463102998e-06, "loss": 0.8128, "step": 9744 }, { "epoch": 0.7512334258402713, "grad_norm": 3.688371419906616, "learning_rate": 1.5371401724662826e-06, "loss": 0.8901, "step": 9745 }, { "epoch": 0.7513105149552883, "grad_norm": 3.9233813285827637, "learning_rate": 1.5362397145136398e-06, "loss": 0.9399, "step": 9746 }, { "epoch": 0.7513876040703052, "grad_norm": 3.741755247116089, "learning_rate": 1.5353394725085113e-06, "loss": 0.8973, "step": 9747 }, { "epoch": 0.7514646931853223, "grad_norm": 3.6673974990844727, "learning_rate": 1.5344394465070234e-06, "loss": 0.8966, "step": 9748 }, { "epoch": 0.7515417823003392, "grad_norm": 3.5368854999542236, "learning_rate": 1.5335396365652865e-06, "loss": 0.9082, "step": 9749 }, { "epoch": 0.7516188714153561, "grad_norm": 3.6051862239837646, "learning_rate": 1.5326400427394023e-06, "loss": 0.9469, "step": 9750 }, { "epoch": 0.7516959605303731, "grad_norm": 3.7850236892700195, "learning_rate": 1.5317406650854515e-06, "loss": 0.8775, "step": 9751 }, { "epoch": 0.75177304964539, "grad_norm": 3.5623207092285156, "learning_rate": 1.5308415036595076e-06, "loss": 0.799, "step": 9752 }, { "epoch": 0.7518501387604071, "grad_norm": 4.15452241897583, "learning_rate": 1.5299425585176292e-06, "loss": 0.9851, "step": 9753 }, { "epoch": 0.751927227875424, "grad_norm": 3.781522274017334, "learning_rate": 1.52904382971586e-06, "loss": 1.033, "step": 9754 }, { "epoch": 0.7520043169904409, "grad_norm": 3.5940423011779785, "learning_rate": 1.528145317310231e-06, "loss": 0.9551, "step": 9755 }, { "epoch": 0.7520814061054579, "grad_norm": 3.518636465072632, "learning_rate": 1.527247021356763e-06, "loss": 0.8302, "step": 9756 }, { "epoch": 0.7521584952204748, "grad_norm": 3.90459942817688, "learning_rate": 1.5263489419114552e-06, "loss": 0.9807, "step": 9757 }, { "epoch": 0.7522355843354919, "grad_norm": 3.519850492477417, "learning_rate": 1.5254510790303e-06, "loss": 0.9089, "step": 9758 }, { "epoch": 0.7523126734505088, "grad_norm": 3.474034547805786, "learning_rate": 1.5245534327692751e-06, "loss": 0.8635, "step": 9759 }, { "epoch": 0.7523897625655257, "grad_norm": 3.7472171783447266, "learning_rate": 1.5236560031843445e-06, "loss": 0.8809, "step": 9760 }, { "epoch": 0.7524668516805427, "grad_norm": 3.900770902633667, "learning_rate": 1.5227587903314568e-06, "loss": 0.9003, "step": 9761 }, { "epoch": 0.7525439407955596, "grad_norm": 3.990259885787964, "learning_rate": 1.5218617942665497e-06, "loss": 0.8674, "step": 9762 }, { "epoch": 0.7526210299105767, "grad_norm": 3.8502142429351807, "learning_rate": 1.5209650150455462e-06, "loss": 0.8873, "step": 9763 }, { "epoch": 0.7526981190255936, "grad_norm": 3.540637969970703, "learning_rate": 1.5200684527243552e-06, "loss": 0.8397, "step": 9764 }, { "epoch": 0.7527752081406105, "grad_norm": 3.576173782348633, "learning_rate": 1.5191721073588755e-06, "loss": 0.9391, "step": 9765 }, { "epoch": 0.7528522972556275, "grad_norm": 3.8144445419311523, "learning_rate": 1.518275979004985e-06, "loss": 0.9871, "step": 9766 }, { "epoch": 0.7529293863706444, "grad_norm": 4.009185314178467, "learning_rate": 1.517380067718555e-06, "loss": 0.9879, "step": 9767 }, { "epoch": 0.7530064754856615, "grad_norm": 3.6188137531280518, "learning_rate": 1.5164843735554408e-06, "loss": 0.8921, "step": 9768 }, { "epoch": 0.7530835646006784, "grad_norm": 3.5967025756835938, "learning_rate": 1.5155888965714843e-06, "loss": 0.8747, "step": 9769 }, { "epoch": 0.7531606537156953, "grad_norm": 3.705009937286377, "learning_rate": 1.514693636822514e-06, "loss": 0.8078, "step": 9770 }, { "epoch": 0.7532377428307123, "grad_norm": 3.469721794128418, "learning_rate": 1.5137985943643463e-06, "loss": 0.8209, "step": 9771 }, { "epoch": 0.7533148319457292, "grad_norm": 3.523388385772705, "learning_rate": 1.5129037692527794e-06, "loss": 0.8536, "step": 9772 }, { "epoch": 0.7533919210607463, "grad_norm": 4.049753665924072, "learning_rate": 1.5120091615436016e-06, "loss": 0.8941, "step": 9773 }, { "epoch": 0.7534690101757632, "grad_norm": 3.7952821254730225, "learning_rate": 1.5111147712925884e-06, "loss": 0.884, "step": 9774 }, { "epoch": 0.7535460992907801, "grad_norm": 3.82209849357605, "learning_rate": 1.5102205985554992e-06, "loss": 0.7733, "step": 9775 }, { "epoch": 0.7536231884057971, "grad_norm": 3.5352354049682617, "learning_rate": 1.5093266433880837e-06, "loss": 0.8167, "step": 9776 }, { "epoch": 0.753700277520814, "grad_norm": 3.5197014808654785, "learning_rate": 1.5084329058460716e-06, "loss": 0.9545, "step": 9777 }, { "epoch": 0.7537773666358311, "grad_norm": 3.3797638416290283, "learning_rate": 1.5075393859851844e-06, "loss": 0.7952, "step": 9778 }, { "epoch": 0.753854455750848, "grad_norm": 3.590395927429199, "learning_rate": 1.5066460838611292e-06, "loss": 0.8798, "step": 9779 }, { "epoch": 0.7539315448658649, "grad_norm": 3.962991952896118, "learning_rate": 1.505752999529597e-06, "loss": 0.8392, "step": 9780 }, { "epoch": 0.7540086339808819, "grad_norm": 3.7387423515319824, "learning_rate": 1.5048601330462693e-06, "loss": 0.9752, "step": 9781 }, { "epoch": 0.7540857230958988, "grad_norm": 3.6760013103485107, "learning_rate": 1.5039674844668112e-06, "loss": 0.8867, "step": 9782 }, { "epoch": 0.7541628122109159, "grad_norm": 3.807915449142456, "learning_rate": 1.503075053846873e-06, "loss": 0.9201, "step": 9783 }, { "epoch": 0.7542399013259328, "grad_norm": 3.836941957473755, "learning_rate": 1.5021828412420942e-06, "loss": 0.9706, "step": 9784 }, { "epoch": 0.7543169904409497, "grad_norm": 3.9428610801696777, "learning_rate": 1.501290846708099e-06, "loss": 0.893, "step": 9785 }, { "epoch": 0.7543940795559667, "grad_norm": 3.6269469261169434, "learning_rate": 1.5003990703004994e-06, "loss": 0.8229, "step": 9786 }, { "epoch": 0.7544711686709836, "grad_norm": 3.6933364868164062, "learning_rate": 1.4995075120748948e-06, "loss": 0.945, "step": 9787 }, { "epoch": 0.7545482577860007, "grad_norm": 3.697035312652588, "learning_rate": 1.498616172086866e-06, "loss": 0.9209, "step": 9788 }, { "epoch": 0.7546253469010176, "grad_norm": 3.5978927612304688, "learning_rate": 1.4977250503919839e-06, "loss": 0.961, "step": 9789 }, { "epoch": 0.7547024360160345, "grad_norm": 3.69553804397583, "learning_rate": 1.4968341470458064e-06, "loss": 0.7702, "step": 9790 }, { "epoch": 0.7547795251310515, "grad_norm": 3.7523858547210693, "learning_rate": 1.495943462103877e-06, "loss": 1.0264, "step": 9791 }, { "epoch": 0.7548566142460684, "grad_norm": 3.5115909576416016, "learning_rate": 1.495052995621724e-06, "loss": 0.8715, "step": 9792 }, { "epoch": 0.7549337033610855, "grad_norm": 4.038724422454834, "learning_rate": 1.4941627476548665e-06, "loss": 0.9478, "step": 9793 }, { "epoch": 0.7550107924761024, "grad_norm": 3.764338970184326, "learning_rate": 1.4932727182588025e-06, "loss": 0.9636, "step": 9794 }, { "epoch": 0.7550878815911193, "grad_norm": 3.6738407611846924, "learning_rate": 1.4923829074890222e-06, "loss": 0.9713, "step": 9795 }, { "epoch": 0.7551649707061363, "grad_norm": 3.553886890411377, "learning_rate": 1.491493315401002e-06, "loss": 0.8905, "step": 9796 }, { "epoch": 0.7552420598211532, "grad_norm": 3.702172040939331, "learning_rate": 1.4906039420502022e-06, "loss": 0.906, "step": 9797 }, { "epoch": 0.7553191489361702, "grad_norm": 3.901486396789551, "learning_rate": 1.4897147874920726e-06, "loss": 0.8935, "step": 9798 }, { "epoch": 0.7553962380511872, "grad_norm": 3.898955821990967, "learning_rate": 1.4888258517820442e-06, "loss": 0.83, "step": 9799 }, { "epoch": 0.7554733271662041, "grad_norm": 3.40311598777771, "learning_rate": 1.4879371349755395e-06, "loss": 0.79, "step": 9800 }, { "epoch": 0.7555504162812211, "grad_norm": 3.6599347591400146, "learning_rate": 1.4870486371279647e-06, "loss": 0.9212, "step": 9801 }, { "epoch": 0.755627505396238, "grad_norm": 3.6006016731262207, "learning_rate": 1.486160358294713e-06, "loss": 0.914, "step": 9802 }, { "epoch": 0.755704594511255, "grad_norm": 3.814122438430786, "learning_rate": 1.4852722985311647e-06, "loss": 0.965, "step": 9803 }, { "epoch": 0.755781683626272, "grad_norm": 3.5150370597839355, "learning_rate": 1.4843844578926863e-06, "loss": 0.8573, "step": 9804 }, { "epoch": 0.7558587727412889, "grad_norm": 3.7043817043304443, "learning_rate": 1.483496836434627e-06, "loss": 0.9301, "step": 9805 }, { "epoch": 0.7559358618563059, "grad_norm": 3.6749911308288574, "learning_rate": 1.4826094342123282e-06, "loss": 0.8818, "step": 9806 }, { "epoch": 0.7560129509713228, "grad_norm": 4.301516532897949, "learning_rate": 1.4817222512811146e-06, "loss": 0.9437, "step": 9807 }, { "epoch": 0.7560900400863398, "grad_norm": 4.242541313171387, "learning_rate": 1.4808352876962984e-06, "loss": 0.9117, "step": 9808 }, { "epoch": 0.7561671292013568, "grad_norm": 3.7353222370147705, "learning_rate": 1.4799485435131745e-06, "loss": 0.8453, "step": 9809 }, { "epoch": 0.7562442183163737, "grad_norm": 3.8031511306762695, "learning_rate": 1.4790620187870275e-06, "loss": 0.9015, "step": 9810 }, { "epoch": 0.7563213074313907, "grad_norm": 4.096917629241943, "learning_rate": 1.478175713573129e-06, "loss": 0.951, "step": 9811 }, { "epoch": 0.7563983965464076, "grad_norm": 3.349189043045044, "learning_rate": 1.477289627926734e-06, "loss": 0.8813, "step": 9812 }, { "epoch": 0.7564754856614246, "grad_norm": 3.9046249389648438, "learning_rate": 1.476403761903088e-06, "loss": 0.8709, "step": 9813 }, { "epoch": 0.7565525747764416, "grad_norm": 4.114388465881348, "learning_rate": 1.4755181155574166e-06, "loss": 0.9669, "step": 9814 }, { "epoch": 0.7566296638914586, "grad_norm": 3.496314287185669, "learning_rate": 1.4746326889449375e-06, "loss": 0.9647, "step": 9815 }, { "epoch": 0.7567067530064755, "grad_norm": 3.8805534839630127, "learning_rate": 1.4737474821208513e-06, "loss": 0.8806, "step": 9816 }, { "epoch": 0.7567838421214924, "grad_norm": 3.560910224914551, "learning_rate": 1.472862495140347e-06, "loss": 0.7968, "step": 9817 }, { "epoch": 0.7568609312365094, "grad_norm": 3.6736807823181152, "learning_rate": 1.4719777280585983e-06, "loss": 0.8541, "step": 9818 }, { "epoch": 0.7569380203515264, "grad_norm": 4.000894069671631, "learning_rate": 1.4710931809307677e-06, "loss": 0.9703, "step": 9819 }, { "epoch": 0.7570151094665434, "grad_norm": 4.015716075897217, "learning_rate": 1.4702088538119996e-06, "loss": 1.0352, "step": 9820 }, { "epoch": 0.7570921985815603, "grad_norm": 3.6512205600738525, "learning_rate": 1.4693247467574273e-06, "loss": 0.9386, "step": 9821 }, { "epoch": 0.7571692876965772, "grad_norm": 3.792320728302002, "learning_rate": 1.4684408598221722e-06, "loss": 0.9188, "step": 9822 }, { "epoch": 0.7572463768115942, "grad_norm": 3.727431297302246, "learning_rate": 1.4675571930613385e-06, "loss": 0.8135, "step": 9823 }, { "epoch": 0.7573234659266112, "grad_norm": 3.9126195907592773, "learning_rate": 1.4666737465300202e-06, "loss": 0.958, "step": 9824 }, { "epoch": 0.7574005550416282, "grad_norm": 3.749931573867798, "learning_rate": 1.4657905202832928e-06, "loss": 0.8214, "step": 9825 }, { "epoch": 0.7574776441566451, "grad_norm": 3.8023717403411865, "learning_rate": 1.4649075143762225e-06, "loss": 0.9376, "step": 9826 }, { "epoch": 0.757554733271662, "grad_norm": 3.8662075996398926, "learning_rate": 1.4640247288638603e-06, "loss": 0.9735, "step": 9827 }, { "epoch": 0.757631822386679, "grad_norm": 3.9173572063446045, "learning_rate": 1.463142163801242e-06, "loss": 0.9421, "step": 9828 }, { "epoch": 0.757708911501696, "grad_norm": 3.737765312194824, "learning_rate": 1.4622598192433928e-06, "loss": 0.9159, "step": 9829 }, { "epoch": 0.757786000616713, "grad_norm": 3.5803210735321045, "learning_rate": 1.461377695245323e-06, "loss": 0.8864, "step": 9830 }, { "epoch": 0.7578630897317299, "grad_norm": 3.9155960083007812, "learning_rate": 1.460495791862025e-06, "loss": 0.9516, "step": 9831 }, { "epoch": 0.7579401788467468, "grad_norm": 3.639129161834717, "learning_rate": 1.4596141091484828e-06, "loss": 0.8667, "step": 9832 }, { "epoch": 0.7580172679617638, "grad_norm": 3.9074134826660156, "learning_rate": 1.4587326471596647e-06, "loss": 0.9008, "step": 9833 }, { "epoch": 0.7580943570767807, "grad_norm": 3.6987130641937256, "learning_rate": 1.4578514059505256e-06, "loss": 0.9457, "step": 9834 }, { "epoch": 0.7581714461917978, "grad_norm": 3.8588478565216064, "learning_rate": 1.4569703855760076e-06, "loss": 0.9084, "step": 9835 }, { "epoch": 0.7582485353068147, "grad_norm": 3.9179983139038086, "learning_rate": 1.4560895860910345e-06, "loss": 0.9484, "step": 9836 }, { "epoch": 0.7583256244218316, "grad_norm": 3.7824649810791016, "learning_rate": 1.4552090075505215e-06, "loss": 0.945, "step": 9837 }, { "epoch": 0.7584027135368486, "grad_norm": 3.568527936935425, "learning_rate": 1.454328650009368e-06, "loss": 0.9535, "step": 9838 }, { "epoch": 0.7584798026518655, "grad_norm": 3.9821395874023438, "learning_rate": 1.4534485135224597e-06, "loss": 0.8473, "step": 9839 }, { "epoch": 0.7585568917668826, "grad_norm": 3.9586429595947266, "learning_rate": 1.452568598144668e-06, "loss": 0.9471, "step": 9840 }, { "epoch": 0.7586339808818995, "grad_norm": 4.391334533691406, "learning_rate": 1.4516889039308535e-06, "loss": 1.0125, "step": 9841 }, { "epoch": 0.7587110699969164, "grad_norm": 3.699589252471924, "learning_rate": 1.4508094309358573e-06, "loss": 0.8814, "step": 9842 }, { "epoch": 0.7587881591119334, "grad_norm": 4.033127784729004, "learning_rate": 1.4499301792145109e-06, "loss": 0.9453, "step": 9843 }, { "epoch": 0.7588652482269503, "grad_norm": 3.849324941635132, "learning_rate": 1.449051148821632e-06, "loss": 0.8697, "step": 9844 }, { "epoch": 0.7589423373419674, "grad_norm": 3.5151302814483643, "learning_rate": 1.4481723398120228e-06, "loss": 0.9318, "step": 9845 }, { "epoch": 0.7590194264569843, "grad_norm": 3.9107677936553955, "learning_rate": 1.4472937522404744e-06, "loss": 0.9946, "step": 9846 }, { "epoch": 0.7590965155720012, "grad_norm": 3.9752719402313232, "learning_rate": 1.4464153861617597e-06, "loss": 0.8786, "step": 9847 }, { "epoch": 0.7591736046870182, "grad_norm": 3.470489978790283, "learning_rate": 1.4455372416306407e-06, "loss": 0.8099, "step": 9848 }, { "epoch": 0.7592506938020351, "grad_norm": 4.009558200836182, "learning_rate": 1.4446593187018637e-06, "loss": 0.9382, "step": 9849 }, { "epoch": 0.7593277829170522, "grad_norm": 4.106441020965576, "learning_rate": 1.4437816174301684e-06, "loss": 0.9847, "step": 9850 }, { "epoch": 0.7594048720320691, "grad_norm": 3.834660768508911, "learning_rate": 1.442904137870269e-06, "loss": 0.8793, "step": 9851 }, { "epoch": 0.759481961147086, "grad_norm": 3.583991050720215, "learning_rate": 1.4420268800768744e-06, "loss": 0.8814, "step": 9852 }, { "epoch": 0.759559050262103, "grad_norm": 3.8111629486083984, "learning_rate": 1.4411498441046761e-06, "loss": 0.9838, "step": 9853 }, { "epoch": 0.7596361393771199, "grad_norm": 3.4147655963897705, "learning_rate": 1.4402730300083534e-06, "loss": 0.8173, "step": 9854 }, { "epoch": 0.759713228492137, "grad_norm": 3.8176896572113037, "learning_rate": 1.4393964378425712e-06, "loss": 0.8702, "step": 9855 }, { "epoch": 0.7597903176071539, "grad_norm": 3.6424262523651123, "learning_rate": 1.438520067661982e-06, "loss": 0.7725, "step": 9856 }, { "epoch": 0.7598674067221708, "grad_norm": 3.9817745685577393, "learning_rate": 1.4376439195212194e-06, "loss": 0.9865, "step": 9857 }, { "epoch": 0.7599444958371878, "grad_norm": 3.7620887756347656, "learning_rate": 1.4367679934749085e-06, "loss": 1.0232, "step": 9858 }, { "epoch": 0.7600215849522047, "grad_norm": 3.7138636112213135, "learning_rate": 1.4358922895776584e-06, "loss": 0.9584, "step": 9859 }, { "epoch": 0.7600986740672218, "grad_norm": 3.80833101272583, "learning_rate": 1.4350168078840653e-06, "loss": 0.9049, "step": 9860 }, { "epoch": 0.7601757631822387, "grad_norm": 3.6317269802093506, "learning_rate": 1.4341415484487126e-06, "loss": 0.8555, "step": 9861 }, { "epoch": 0.7602528522972556, "grad_norm": 3.839651107788086, "learning_rate": 1.4332665113261645e-06, "loss": 0.8839, "step": 9862 }, { "epoch": 0.7603299414122726, "grad_norm": 4.206575870513916, "learning_rate": 1.4323916965709766e-06, "loss": 0.9231, "step": 9863 }, { "epoch": 0.7604070305272895, "grad_norm": 3.6243884563446045, "learning_rate": 1.4315171042376897e-06, "loss": 0.953, "step": 9864 }, { "epoch": 0.7604841196423066, "grad_norm": 3.7419850826263428, "learning_rate": 1.43064273438083e-06, "loss": 1.0172, "step": 9865 }, { "epoch": 0.7605612087573235, "grad_norm": 3.832564353942871, "learning_rate": 1.4297685870549088e-06, "loss": 0.8665, "step": 9866 }, { "epoch": 0.7606382978723404, "grad_norm": 3.582911491394043, "learning_rate": 1.4288946623144272e-06, "loss": 0.8848, "step": 9867 }, { "epoch": 0.7607153869873574, "grad_norm": 3.7686104774475098, "learning_rate": 1.4280209602138673e-06, "loss": 0.932, "step": 9868 }, { "epoch": 0.7607924761023743, "grad_norm": 3.6485867500305176, "learning_rate": 1.4271474808077e-06, "loss": 0.9416, "step": 9869 }, { "epoch": 0.7608695652173914, "grad_norm": 3.6767213344573975, "learning_rate": 1.4262742241503836e-06, "loss": 0.9576, "step": 9870 }, { "epoch": 0.7609466543324083, "grad_norm": 3.4594292640686035, "learning_rate": 1.42540119029636e-06, "loss": 0.8465, "step": 9871 }, { "epoch": 0.7610237434474252, "grad_norm": 3.935392141342163, "learning_rate": 1.4245283793000608e-06, "loss": 0.9516, "step": 9872 }, { "epoch": 0.7611008325624422, "grad_norm": 3.8447792530059814, "learning_rate": 1.4236557912158977e-06, "loss": 0.8472, "step": 9873 }, { "epoch": 0.7611779216774591, "grad_norm": 3.4628243446350098, "learning_rate": 1.4227834260982732e-06, "loss": 0.861, "step": 9874 }, { "epoch": 0.7612550107924761, "grad_norm": 3.9336140155792236, "learning_rate": 1.4219112840015759e-06, "loss": 0.9094, "step": 9875 }, { "epoch": 0.7613320999074931, "grad_norm": 4.2745041847229, "learning_rate": 1.421039364980178e-06, "loss": 0.9124, "step": 9876 }, { "epoch": 0.76140918902251, "grad_norm": 4.0710625648498535, "learning_rate": 1.4201676690884403e-06, "loss": 0.7956, "step": 9877 }, { "epoch": 0.761486278137527, "grad_norm": 3.9371089935302734, "learning_rate": 1.4192961963807094e-06, "loss": 0.9185, "step": 9878 }, { "epoch": 0.7615633672525439, "grad_norm": 3.549879789352417, "learning_rate": 1.4184249469113138e-06, "loss": 0.8869, "step": 9879 }, { "epoch": 0.761640456367561, "grad_norm": 3.896291494369507, "learning_rate": 1.417553920734574e-06, "loss": 0.8783, "step": 9880 }, { "epoch": 0.7617175454825779, "grad_norm": 3.623753070831299, "learning_rate": 1.4166831179047923e-06, "loss": 0.8513, "step": 9881 }, { "epoch": 0.7617946345975948, "grad_norm": 3.9215145111083984, "learning_rate": 1.4158125384762606e-06, "loss": 0.8208, "step": 9882 }, { "epoch": 0.7618717237126118, "grad_norm": 3.590507745742798, "learning_rate": 1.4149421825032556e-06, "loss": 0.918, "step": 9883 }, { "epoch": 0.7619488128276287, "grad_norm": 4.319372177124023, "learning_rate": 1.4140720500400363e-06, "loss": 0.9689, "step": 9884 }, { "epoch": 0.7620259019426457, "grad_norm": 3.8288538455963135, "learning_rate": 1.4132021411408526e-06, "loss": 0.9748, "step": 9885 }, { "epoch": 0.7621029910576627, "grad_norm": 4.058238506317139, "learning_rate": 1.4123324558599389e-06, "loss": 1.0233, "step": 9886 }, { "epoch": 0.7621800801726796, "grad_norm": 3.779918670654297, "learning_rate": 1.4114629942515156e-06, "loss": 0.8905, "step": 9887 }, { "epoch": 0.7622571692876966, "grad_norm": 3.943232774734497, "learning_rate": 1.410593756369789e-06, "loss": 0.8821, "step": 9888 }, { "epoch": 0.7623342584027135, "grad_norm": 3.8813998699188232, "learning_rate": 1.4097247422689537e-06, "loss": 0.9586, "step": 9889 }, { "epoch": 0.7624113475177305, "grad_norm": 4.069021224975586, "learning_rate": 1.408855952003184e-06, "loss": 0.9016, "step": 9890 }, { "epoch": 0.7624884366327475, "grad_norm": 3.904691457748413, "learning_rate": 1.4079873856266468e-06, "loss": 0.8317, "step": 9891 }, { "epoch": 0.7625655257477644, "grad_norm": 3.729013681411743, "learning_rate": 1.4071190431934934e-06, "loss": 1.0032, "step": 9892 }, { "epoch": 0.7626426148627814, "grad_norm": 3.8653934001922607, "learning_rate": 1.4062509247578586e-06, "loss": 0.9653, "step": 9893 }, { "epoch": 0.7627197039777983, "grad_norm": 3.8940982818603516, "learning_rate": 1.405383030373867e-06, "loss": 1.0486, "step": 9894 }, { "epoch": 0.7627967930928153, "grad_norm": 3.855316162109375, "learning_rate": 1.4045153600956257e-06, "loss": 1.0069, "step": 9895 }, { "epoch": 0.7628738822078323, "grad_norm": 4.0627827644348145, "learning_rate": 1.4036479139772309e-06, "loss": 0.9452, "step": 9896 }, { "epoch": 0.7629509713228492, "grad_norm": 3.806206464767456, "learning_rate": 1.402780692072762e-06, "loss": 0.8991, "step": 9897 }, { "epoch": 0.7630280604378662, "grad_norm": 4.070652484893799, "learning_rate": 1.4019136944362882e-06, "loss": 0.9424, "step": 9898 }, { "epoch": 0.7631051495528831, "grad_norm": 3.583237409591675, "learning_rate": 1.401046921121859e-06, "loss": 0.9428, "step": 9899 }, { "epoch": 0.7631822386679001, "grad_norm": 3.9483799934387207, "learning_rate": 1.400180372183515e-06, "loss": 1.017, "step": 9900 }, { "epoch": 0.763259327782917, "grad_norm": 3.897564172744751, "learning_rate": 1.3993140476752808e-06, "loss": 0.8972, "step": 9901 }, { "epoch": 0.763336416897934, "grad_norm": 3.622753620147705, "learning_rate": 1.3984479476511676e-06, "loss": 0.925, "step": 9902 }, { "epoch": 0.763413506012951, "grad_norm": 3.872459888458252, "learning_rate": 1.3975820721651718e-06, "loss": 0.9007, "step": 9903 }, { "epoch": 0.7634905951279679, "grad_norm": 3.8231959342956543, "learning_rate": 1.3967164212712774e-06, "loss": 0.8777, "step": 9904 }, { "epoch": 0.7635676842429849, "grad_norm": 3.6052405834198, "learning_rate": 1.3958509950234516e-06, "loss": 0.926, "step": 9905 }, { "epoch": 0.7636447733580018, "grad_norm": 4.091972351074219, "learning_rate": 1.3949857934756495e-06, "loss": 0.9525, "step": 9906 }, { "epoch": 0.7637218624730188, "grad_norm": 3.3216168880462646, "learning_rate": 1.394120816681812e-06, "loss": 0.8179, "step": 9907 }, { "epoch": 0.7637989515880358, "grad_norm": 3.8997809886932373, "learning_rate": 1.3932560646958665e-06, "loss": 0.9877, "step": 9908 }, { "epoch": 0.7638760407030527, "grad_norm": 3.5005760192871094, "learning_rate": 1.3923915375717272e-06, "loss": 0.9585, "step": 9909 }, { "epoch": 0.7639531298180697, "grad_norm": 3.4733173847198486, "learning_rate": 1.3915272353632896e-06, "loss": 0.873, "step": 9910 }, { "epoch": 0.7640302189330866, "grad_norm": 3.862825393676758, "learning_rate": 1.39066315812444e-06, "loss": 0.8944, "step": 9911 }, { "epoch": 0.7641073080481036, "grad_norm": 3.6997947692871094, "learning_rate": 1.3897993059090492e-06, "loss": 0.9553, "step": 9912 }, { "epoch": 0.7641843971631206, "grad_norm": 4.415987491607666, "learning_rate": 1.388935678770974e-06, "loss": 0.8898, "step": 9913 }, { "epoch": 0.7642614862781375, "grad_norm": 3.8351826667785645, "learning_rate": 1.3880722767640575e-06, "loss": 1.0199, "step": 9914 }, { "epoch": 0.7643385753931545, "grad_norm": 3.8005855083465576, "learning_rate": 1.387209099942129e-06, "loss": 0.8509, "step": 9915 }, { "epoch": 0.7644156645081714, "grad_norm": 3.49656343460083, "learning_rate": 1.3863461483590008e-06, "loss": 0.8576, "step": 9916 }, { "epoch": 0.7644927536231884, "grad_norm": 3.7017486095428467, "learning_rate": 1.3854834220684743e-06, "loss": 0.976, "step": 9917 }, { "epoch": 0.7645698427382054, "grad_norm": 3.82146954536438, "learning_rate": 1.3846209211243366e-06, "loss": 0.8766, "step": 9918 }, { "epoch": 0.7646469318532223, "grad_norm": 3.6849117279052734, "learning_rate": 1.3837586455803599e-06, "loss": 0.9259, "step": 9919 }, { "epoch": 0.7647240209682393, "grad_norm": 3.7132599353790283, "learning_rate": 1.382896595490304e-06, "loss": 0.853, "step": 9920 }, { "epoch": 0.7648011100832562, "grad_norm": 4.089300632476807, "learning_rate": 1.3820347709079103e-06, "loss": 0.9161, "step": 9921 }, { "epoch": 0.7648781991982732, "grad_norm": 3.6763312816619873, "learning_rate": 1.3811731718869108e-06, "loss": 0.9985, "step": 9922 }, { "epoch": 0.7649552883132902, "grad_norm": 3.901153326034546, "learning_rate": 1.3803117984810221e-06, "loss": 0.9706, "step": 9923 }, { "epoch": 0.7650323774283071, "grad_norm": 3.6680846214294434, "learning_rate": 1.3794506507439454e-06, "loss": 0.9672, "step": 9924 }, { "epoch": 0.7651094665433241, "grad_norm": 3.704328775405884, "learning_rate": 1.3785897287293693e-06, "loss": 0.9395, "step": 9925 }, { "epoch": 0.765186555658341, "grad_norm": 3.6774888038635254, "learning_rate": 1.3777290324909698e-06, "loss": 1.0224, "step": 9926 }, { "epoch": 0.765263644773358, "grad_norm": 3.7571513652801514, "learning_rate": 1.376868562082403e-06, "loss": 1.0518, "step": 9927 }, { "epoch": 0.765340733888375, "grad_norm": 3.6580474376678467, "learning_rate": 1.3760083175573168e-06, "loss": 0.8153, "step": 9928 }, { "epoch": 0.7654178230033919, "grad_norm": 4.089507579803467, "learning_rate": 1.3751482989693433e-06, "loss": 0.9439, "step": 9929 }, { "epoch": 0.7654949121184089, "grad_norm": 3.566601037979126, "learning_rate": 1.374288506372099e-06, "loss": 0.8285, "step": 9930 }, { "epoch": 0.7655720012334258, "grad_norm": 3.4016520977020264, "learning_rate": 1.3734289398191902e-06, "loss": 0.8906, "step": 9931 }, { "epoch": 0.7656490903484428, "grad_norm": 3.8981709480285645, "learning_rate": 1.372569599364203e-06, "loss": 1.0273, "step": 9932 }, { "epoch": 0.7657261794634598, "grad_norm": 4.178238868713379, "learning_rate": 1.3717104850607144e-06, "loss": 1.0948, "step": 9933 }, { "epoch": 0.7658032685784767, "grad_norm": 3.904507875442505, "learning_rate": 1.3708515969622854e-06, "loss": 1.0076, "step": 9934 }, { "epoch": 0.7658803576934937, "grad_norm": 3.62880802154541, "learning_rate": 1.369992935122464e-06, "loss": 0.9251, "step": 9935 }, { "epoch": 0.7659574468085106, "grad_norm": 3.961493968963623, "learning_rate": 1.369134499594782e-06, "loss": 0.9443, "step": 9936 }, { "epoch": 0.7660345359235275, "grad_norm": 3.5568950176239014, "learning_rate": 1.3682762904327613e-06, "loss": 0.9349, "step": 9937 }, { "epoch": 0.7661116250385446, "grad_norm": 4.020711898803711, "learning_rate": 1.3674183076899016e-06, "loss": 0.8839, "step": 9938 }, { "epoch": 0.7661887141535615, "grad_norm": 4.319362163543701, "learning_rate": 1.3665605514196984e-06, "loss": 1.0175, "step": 9939 }, { "epoch": 0.7662658032685785, "grad_norm": 3.667689085006714, "learning_rate": 1.3657030216756263e-06, "loss": 0.8792, "step": 9940 }, { "epoch": 0.7663428923835954, "grad_norm": 3.632988214492798, "learning_rate": 1.3648457185111502e-06, "loss": 0.8438, "step": 9941 }, { "epoch": 0.7664199814986123, "grad_norm": 3.730689525604248, "learning_rate": 1.363988641979715e-06, "loss": 0.9797, "step": 9942 }, { "epoch": 0.7664970706136294, "grad_norm": 3.5583460330963135, "learning_rate": 1.3631317921347564e-06, "loss": 0.7986, "step": 9943 }, { "epoch": 0.7665741597286463, "grad_norm": 3.7527616024017334, "learning_rate": 1.3622751690296947e-06, "loss": 0.9162, "step": 9944 }, { "epoch": 0.7666512488436633, "grad_norm": 3.690645694732666, "learning_rate": 1.3614187727179368e-06, "loss": 0.9612, "step": 9945 }, { "epoch": 0.7667283379586802, "grad_norm": 3.8504841327667236, "learning_rate": 1.3605626032528746e-06, "loss": 0.8196, "step": 9946 }, { "epoch": 0.7668054270736971, "grad_norm": 3.3687868118286133, "learning_rate": 1.3597066606878834e-06, "loss": 0.8102, "step": 9947 }, { "epoch": 0.7668825161887142, "grad_norm": 3.635887622833252, "learning_rate": 1.3588509450763281e-06, "loss": 0.8814, "step": 9948 }, { "epoch": 0.7669596053037311, "grad_norm": 3.819847822189331, "learning_rate": 1.3579954564715587e-06, "loss": 0.8737, "step": 9949 }, { "epoch": 0.7670366944187481, "grad_norm": 3.8392279148101807, "learning_rate": 1.3571401949269103e-06, "loss": 0.9532, "step": 9950 }, { "epoch": 0.767113783533765, "grad_norm": 3.4588663578033447, "learning_rate": 1.356285160495704e-06, "loss": 0.7837, "step": 9951 }, { "epoch": 0.7671908726487819, "grad_norm": 3.9829211235046387, "learning_rate": 1.3554303532312475e-06, "loss": 0.9798, "step": 9952 }, { "epoch": 0.767267961763799, "grad_norm": 3.6021299362182617, "learning_rate": 1.354575773186832e-06, "loss": 0.8786, "step": 9953 }, { "epoch": 0.7673450508788159, "grad_norm": 3.810025453567505, "learning_rate": 1.353721420415736e-06, "loss": 0.9356, "step": 9954 }, { "epoch": 0.7674221399938329, "grad_norm": 3.6363253593444824, "learning_rate": 1.3528672949712257e-06, "loss": 0.9352, "step": 9955 }, { "epoch": 0.7674992291088498, "grad_norm": 3.648545980453491, "learning_rate": 1.3520133969065502e-06, "loss": 0.8348, "step": 9956 }, { "epoch": 0.7675763182238667, "grad_norm": 3.741356611251831, "learning_rate": 1.3511597262749476e-06, "loss": 0.8785, "step": 9957 }, { "epoch": 0.7676534073388838, "grad_norm": 3.7994656562805176, "learning_rate": 1.3503062831296372e-06, "loss": 0.9241, "step": 9958 }, { "epoch": 0.7677304964539007, "grad_norm": 3.677051544189453, "learning_rate": 1.349453067523827e-06, "loss": 0.9047, "step": 9959 }, { "epoch": 0.7678075855689177, "grad_norm": 3.7097387313842773, "learning_rate": 1.3486000795107118e-06, "loss": 0.9048, "step": 9960 }, { "epoch": 0.7678846746839346, "grad_norm": 3.4744412899017334, "learning_rate": 1.3477473191434703e-06, "loss": 0.939, "step": 9961 }, { "epoch": 0.7679617637989515, "grad_norm": 4.095146179199219, "learning_rate": 1.346894786475268e-06, "loss": 1.0014, "step": 9962 }, { "epoch": 0.7680388529139686, "grad_norm": 3.7162740230560303, "learning_rate": 1.3460424815592577e-06, "loss": 0.8576, "step": 9963 }, { "epoch": 0.7681159420289855, "grad_norm": 3.45137095451355, "learning_rate": 1.3451904044485725e-06, "loss": 0.8469, "step": 9964 }, { "epoch": 0.7681930311440025, "grad_norm": 3.4185662269592285, "learning_rate": 1.3443385551963373e-06, "loss": 0.741, "step": 9965 }, { "epoch": 0.7682701202590194, "grad_norm": 3.571277618408203, "learning_rate": 1.3434869338556594e-06, "loss": 0.9274, "step": 9966 }, { "epoch": 0.7683472093740363, "grad_norm": 3.5546271800994873, "learning_rate": 1.3426355404796337e-06, "loss": 0.8564, "step": 9967 }, { "epoch": 0.7684242984890534, "grad_norm": 3.800027370452881, "learning_rate": 1.341784375121342e-06, "loss": 0.9594, "step": 9968 }, { "epoch": 0.7685013876040703, "grad_norm": 3.928795337677002, "learning_rate": 1.3409334378338461e-06, "loss": 0.9045, "step": 9969 }, { "epoch": 0.7685784767190873, "grad_norm": 3.6231064796447754, "learning_rate": 1.3400827286702001e-06, "loss": 0.9071, "step": 9970 }, { "epoch": 0.7686555658341042, "grad_norm": 3.7267343997955322, "learning_rate": 1.3392322476834402e-06, "loss": 0.9177, "step": 9971 }, { "epoch": 0.7687326549491211, "grad_norm": 3.9702486991882324, "learning_rate": 1.3383819949265908e-06, "loss": 0.9202, "step": 9972 }, { "epoch": 0.7688097440641382, "grad_norm": 3.364682912826538, "learning_rate": 1.3375319704526595e-06, "loss": 0.8446, "step": 9973 }, { "epoch": 0.7688868331791551, "grad_norm": 4.229642868041992, "learning_rate": 1.336682174314643e-06, "loss": 1.0238, "step": 9974 }, { "epoch": 0.7689639222941721, "grad_norm": 3.9624364376068115, "learning_rate": 1.3358326065655187e-06, "loss": 1.0482, "step": 9975 }, { "epoch": 0.769041011409189, "grad_norm": 3.632380485534668, "learning_rate": 1.334983267258254e-06, "loss": 0.8454, "step": 9976 }, { "epoch": 0.7691181005242059, "grad_norm": 3.768838882446289, "learning_rate": 1.334134156445801e-06, "loss": 0.8613, "step": 9977 }, { "epoch": 0.769195189639223, "grad_norm": 3.4363794326782227, "learning_rate": 1.3332852741810975e-06, "loss": 0.8998, "step": 9978 }, { "epoch": 0.7692722787542399, "grad_norm": 3.6031389236450195, "learning_rate": 1.332436620517068e-06, "loss": 0.8144, "step": 9979 }, { "epoch": 0.7693493678692569, "grad_norm": 3.714672327041626, "learning_rate": 1.331588195506619e-06, "loss": 1.0322, "step": 9980 }, { "epoch": 0.7694264569842738, "grad_norm": 3.482741117477417, "learning_rate": 1.330739999202647e-06, "loss": 0.8334, "step": 9981 }, { "epoch": 0.7695035460992907, "grad_norm": 4.036420822143555, "learning_rate": 1.3298920316580304e-06, "loss": 0.9349, "step": 9982 }, { "epoch": 0.7695806352143078, "grad_norm": 4.182904243469238, "learning_rate": 1.3290442929256415e-06, "loss": 0.8901, "step": 9983 }, { "epoch": 0.7696577243293247, "grad_norm": 3.8596582412719727, "learning_rate": 1.3281967830583264e-06, "loss": 0.9771, "step": 9984 }, { "epoch": 0.7697348134443417, "grad_norm": 3.6666831970214844, "learning_rate": 1.3273495021089255e-06, "loss": 0.8917, "step": 9985 }, { "epoch": 0.7698119025593586, "grad_norm": 3.531444787979126, "learning_rate": 1.326502450130262e-06, "loss": 0.8601, "step": 9986 }, { "epoch": 0.7698889916743756, "grad_norm": 4.0722551345825195, "learning_rate": 1.3256556271751454e-06, "loss": 1.0251, "step": 9987 }, { "epoch": 0.7699660807893925, "grad_norm": 3.8301305770874023, "learning_rate": 1.3248090332963697e-06, "loss": 0.9047, "step": 9988 }, { "epoch": 0.7700431699044095, "grad_norm": 3.6400532722473145, "learning_rate": 1.323962668546719e-06, "loss": 0.995, "step": 9989 }, { "epoch": 0.7701202590194265, "grad_norm": 4.178936958312988, "learning_rate": 1.3231165329789546e-06, "loss": 0.9048, "step": 9990 }, { "epoch": 0.7701973481344434, "grad_norm": 4.014671325683594, "learning_rate": 1.3222706266458323e-06, "loss": 0.9091, "step": 9991 }, { "epoch": 0.7702744372494604, "grad_norm": 4.306642055511475, "learning_rate": 1.3214249496000887e-06, "loss": 1.0158, "step": 9992 }, { "epoch": 0.7703515263644773, "grad_norm": 3.796880006790161, "learning_rate": 1.3205795018944473e-06, "loss": 0.9725, "step": 9993 }, { "epoch": 0.7704286154794943, "grad_norm": 3.244234800338745, "learning_rate": 1.3197342835816196e-06, "loss": 0.7616, "step": 9994 }, { "epoch": 0.7705057045945113, "grad_norm": 3.749753713607788, "learning_rate": 1.3188892947142973e-06, "loss": 0.825, "step": 9995 }, { "epoch": 0.7705827937095282, "grad_norm": 3.7566635608673096, "learning_rate": 1.3180445353451621e-06, "loss": 0.91, "step": 9996 }, { "epoch": 0.7706598828245452, "grad_norm": 3.844532012939453, "learning_rate": 1.3172000055268814e-06, "loss": 0.96, "step": 9997 }, { "epoch": 0.7707369719395621, "grad_norm": 3.6308090686798096, "learning_rate": 1.3163557053121062e-06, "loss": 0.9925, "step": 9998 }, { "epoch": 0.7708140610545791, "grad_norm": 3.9852662086486816, "learning_rate": 1.3155116347534746e-06, "loss": 1.0156, "step": 9999 }, { "epoch": 0.7708911501695961, "grad_norm": 3.719912052154541, "learning_rate": 1.3146677939036118e-06, "loss": 0.9678, "step": 10000 }, { "epoch": 0.770968239284613, "grad_norm": 3.3894362449645996, "learning_rate": 1.3138241828151238e-06, "loss": 0.8484, "step": 10001 }, { "epoch": 0.77104532839963, "grad_norm": 3.582899808883667, "learning_rate": 1.3129808015406064e-06, "loss": 0.7916, "step": 10002 }, { "epoch": 0.7711224175146469, "grad_norm": 3.8180840015411377, "learning_rate": 1.312137650132641e-06, "loss": 0.9036, "step": 10003 }, { "epoch": 0.7711995066296639, "grad_norm": 3.5141336917877197, "learning_rate": 1.3112947286437927e-06, "loss": 0.8419, "step": 10004 }, { "epoch": 0.7712765957446809, "grad_norm": 3.5062694549560547, "learning_rate": 1.3104520371266155e-06, "loss": 0.8971, "step": 10005 }, { "epoch": 0.7713536848596978, "grad_norm": 3.4877374172210693, "learning_rate": 1.3096095756336442e-06, "loss": 0.9091, "step": 10006 }, { "epoch": 0.7714307739747148, "grad_norm": 4.043210029602051, "learning_rate": 1.308767344217402e-06, "loss": 1.0359, "step": 10007 }, { "epoch": 0.7715078630897317, "grad_norm": 4.2239990234375, "learning_rate": 1.307925342930399e-06, "loss": 0.9755, "step": 10008 }, { "epoch": 0.7715849522047487, "grad_norm": 3.9452037811279297, "learning_rate": 1.3070835718251284e-06, "loss": 0.963, "step": 10009 }, { "epoch": 0.7716620413197657, "grad_norm": 3.688035249710083, "learning_rate": 1.306242030954072e-06, "loss": 0.9624, "step": 10010 }, { "epoch": 0.7717391304347826, "grad_norm": 3.4075801372528076, "learning_rate": 1.3054007203696955e-06, "loss": 0.8649, "step": 10011 }, { "epoch": 0.7718162195497996, "grad_norm": 3.897404909133911, "learning_rate": 1.3045596401244477e-06, "loss": 0.8981, "step": 10012 }, { "epoch": 0.7718933086648165, "grad_norm": 3.891972303390503, "learning_rate": 1.303718790270767e-06, "loss": 0.9878, "step": 10013 }, { "epoch": 0.7719703977798335, "grad_norm": 3.599642515182495, "learning_rate": 1.3028781708610766e-06, "loss": 0.8243, "step": 10014 }, { "epoch": 0.7720474868948505, "grad_norm": 3.8693175315856934, "learning_rate": 1.3020377819477843e-06, "loss": 0.9088, "step": 10015 }, { "epoch": 0.7721245760098674, "grad_norm": 3.6031887531280518, "learning_rate": 1.3011976235832852e-06, "loss": 0.8812, "step": 10016 }, { "epoch": 0.7722016651248844, "grad_norm": 3.770428419113159, "learning_rate": 1.3003576958199565e-06, "loss": 1.026, "step": 10017 }, { "epoch": 0.7722787542399013, "grad_norm": 3.2302563190460205, "learning_rate": 1.2995179987101648e-06, "loss": 0.7229, "step": 10018 }, { "epoch": 0.7723558433549182, "grad_norm": 3.593198537826538, "learning_rate": 1.29867853230626e-06, "loss": 0.7879, "step": 10019 }, { "epoch": 0.7724329324699353, "grad_norm": 4.028049945831299, "learning_rate": 1.297839296660579e-06, "loss": 0.8327, "step": 10020 }, { "epoch": 0.7725100215849522, "grad_norm": 3.5654919147491455, "learning_rate": 1.2970002918254443e-06, "loss": 0.9508, "step": 10021 }, { "epoch": 0.7725871106999692, "grad_norm": 3.5768983364105225, "learning_rate": 1.2961615178531644e-06, "loss": 0.8049, "step": 10022 }, { "epoch": 0.7726641998149861, "grad_norm": 3.564749240875244, "learning_rate": 1.29532297479603e-06, "loss": 0.8298, "step": 10023 }, { "epoch": 0.772741288930003, "grad_norm": 3.7964117527008057, "learning_rate": 1.2944846627063208e-06, "loss": 0.8518, "step": 10024 }, { "epoch": 0.7728183780450201, "grad_norm": 3.8929343223571777, "learning_rate": 1.2936465816363014e-06, "loss": 0.8663, "step": 10025 }, { "epoch": 0.772895467160037, "grad_norm": 4.378154277801514, "learning_rate": 1.2928087316382225e-06, "loss": 0.9825, "step": 10026 }, { "epoch": 0.772972556275054, "grad_norm": 3.699422836303711, "learning_rate": 1.2919711127643186e-06, "loss": 0.939, "step": 10027 }, { "epoch": 0.7730496453900709, "grad_norm": 3.876960277557373, "learning_rate": 1.2911337250668115e-06, "loss": 0.9534, "step": 10028 }, { "epoch": 0.7731267345050878, "grad_norm": 4.108030319213867, "learning_rate": 1.290296568597908e-06, "loss": 0.9403, "step": 10029 }, { "epoch": 0.7732038236201049, "grad_norm": 3.5450022220611572, "learning_rate": 1.2894596434098006e-06, "loss": 0.9481, "step": 10030 }, { "epoch": 0.7732809127351218, "grad_norm": 3.4901676177978516, "learning_rate": 1.2886229495546687e-06, "loss": 0.8106, "step": 10031 }, { "epoch": 0.7733580018501388, "grad_norm": 4.202146053314209, "learning_rate": 1.2877864870846724e-06, "loss": 0.8711, "step": 10032 }, { "epoch": 0.7734350909651557, "grad_norm": 3.778503656387329, "learning_rate": 1.2869502560519626e-06, "loss": 1.053, "step": 10033 }, { "epoch": 0.7735121800801726, "grad_norm": 3.5146751403808594, "learning_rate": 1.2861142565086737e-06, "loss": 0.7681, "step": 10034 }, { "epoch": 0.7735892691951897, "grad_norm": 3.5477395057678223, "learning_rate": 1.2852784885069265e-06, "loss": 0.9164, "step": 10035 }, { "epoch": 0.7736663583102066, "grad_norm": 3.6575543880462646, "learning_rate": 1.284442952098826e-06, "loss": 0.8941, "step": 10036 }, { "epoch": 0.7737434474252236, "grad_norm": 3.6682982444763184, "learning_rate": 1.2836076473364662e-06, "loss": 0.824, "step": 10037 }, { "epoch": 0.7738205365402405, "grad_norm": 3.7592432498931885, "learning_rate": 1.2827725742719205e-06, "loss": 0.9658, "step": 10038 }, { "epoch": 0.7738976256552574, "grad_norm": 3.7127466201782227, "learning_rate": 1.2819377329572525e-06, "loss": 0.8969, "step": 10039 }, { "epoch": 0.7739747147702745, "grad_norm": 3.9338138103485107, "learning_rate": 1.2811031234445103e-06, "loss": 1.0264, "step": 10040 }, { "epoch": 0.7740518038852914, "grad_norm": 3.961344003677368, "learning_rate": 1.2802687457857277e-06, "loss": 0.9453, "step": 10041 }, { "epoch": 0.7741288930003084, "grad_norm": 3.9601597785949707, "learning_rate": 1.2794346000329256e-06, "loss": 0.9532, "step": 10042 }, { "epoch": 0.7742059821153253, "grad_norm": 3.8072171211242676, "learning_rate": 1.278600686238105e-06, "loss": 1.0072, "step": 10043 }, { "epoch": 0.7742830712303422, "grad_norm": 3.795827865600586, "learning_rate": 1.2777670044532586e-06, "loss": 0.875, "step": 10044 }, { "epoch": 0.7743601603453593, "grad_norm": 3.6865553855895996, "learning_rate": 1.2769335547303613e-06, "loss": 0.9439, "step": 10045 }, { "epoch": 0.7744372494603762, "grad_norm": 3.5264663696289062, "learning_rate": 1.2761003371213743e-06, "loss": 0.942, "step": 10046 }, { "epoch": 0.7745143385753932, "grad_norm": 3.729469060897827, "learning_rate": 1.2752673516782448e-06, "loss": 0.9807, "step": 10047 }, { "epoch": 0.7745914276904101, "grad_norm": 3.5642600059509277, "learning_rate": 1.2744345984529066e-06, "loss": 0.9168, "step": 10048 }, { "epoch": 0.774668516805427, "grad_norm": 3.684962749481201, "learning_rate": 1.2736020774972746e-06, "loss": 0.9065, "step": 10049 }, { "epoch": 0.7747456059204441, "grad_norm": 3.697866916656494, "learning_rate": 1.2727697888632534e-06, "loss": 0.8338, "step": 10050 }, { "epoch": 0.774822695035461, "grad_norm": 4.074203968048096, "learning_rate": 1.271937732602732e-06, "loss": 0.868, "step": 10051 }, { "epoch": 0.774899784150478, "grad_norm": 3.6111466884613037, "learning_rate": 1.2711059087675853e-06, "loss": 0.9527, "step": 10052 }, { "epoch": 0.7749768732654949, "grad_norm": 3.5815255641937256, "learning_rate": 1.2702743174096737e-06, "loss": 0.9145, "step": 10053 }, { "epoch": 0.7750539623805118, "grad_norm": 3.476964235305786, "learning_rate": 1.2694429585808404e-06, "loss": 0.8759, "step": 10054 }, { "epoch": 0.7751310514955289, "grad_norm": 3.8654966354370117, "learning_rate": 1.2686118323329178e-06, "loss": 0.9899, "step": 10055 }, { "epoch": 0.7752081406105458, "grad_norm": 3.7267918586730957, "learning_rate": 1.267780938717722e-06, "loss": 0.9701, "step": 10056 }, { "epoch": 0.7752852297255628, "grad_norm": 4.117705821990967, "learning_rate": 1.2669502777870546e-06, "loss": 0.8462, "step": 10057 }, { "epoch": 0.7753623188405797, "grad_norm": 3.576138496398926, "learning_rate": 1.266119849592704e-06, "loss": 0.9018, "step": 10058 }, { "epoch": 0.7754394079555966, "grad_norm": 5.330030918121338, "learning_rate": 1.2652896541864435e-06, "loss": 0.8455, "step": 10059 }, { "epoch": 0.7755164970706137, "grad_norm": 3.8612890243530273, "learning_rate": 1.264459691620029e-06, "loss": 0.9188, "step": 10060 }, { "epoch": 0.7755935861856306, "grad_norm": 4.6686787605285645, "learning_rate": 1.2636299619452064e-06, "loss": 0.9712, "step": 10061 }, { "epoch": 0.7756706753006476, "grad_norm": 4.158293724060059, "learning_rate": 1.2628004652137044e-06, "loss": 0.8994, "step": 10062 }, { "epoch": 0.7757477644156645, "grad_norm": 3.727849245071411, "learning_rate": 1.2619712014772378e-06, "loss": 0.8811, "step": 10063 }, { "epoch": 0.7758248535306814, "grad_norm": 3.6993374824523926, "learning_rate": 1.2611421707875083e-06, "loss": 0.848, "step": 10064 }, { "epoch": 0.7759019426456985, "grad_norm": 3.298645496368408, "learning_rate": 1.2603133731961992e-06, "loss": 0.8555, "step": 10065 }, { "epoch": 0.7759790317607154, "grad_norm": 3.6150310039520264, "learning_rate": 1.2594848087549826e-06, "loss": 0.9416, "step": 10066 }, { "epoch": 0.7760561208757324, "grad_norm": 3.694746732711792, "learning_rate": 1.2586564775155158e-06, "loss": 0.8568, "step": 10067 }, { "epoch": 0.7761332099907493, "grad_norm": 4.369163990020752, "learning_rate": 1.257828379529441e-06, "loss": 1.0183, "step": 10068 }, { "epoch": 0.7762102991057662, "grad_norm": 3.4145572185516357, "learning_rate": 1.257000514848385e-06, "loss": 0.874, "step": 10069 }, { "epoch": 0.7762873882207832, "grad_norm": 4.185209274291992, "learning_rate": 1.2561728835239633e-06, "loss": 0.8839, "step": 10070 }, { "epoch": 0.7763644773358002, "grad_norm": 3.814661979675293, "learning_rate": 1.255345485607769e-06, "loss": 0.9982, "step": 10071 }, { "epoch": 0.7764415664508172, "grad_norm": 3.9218826293945312, "learning_rate": 1.2545183211513918e-06, "loss": 0.933, "step": 10072 }, { "epoch": 0.7765186555658341, "grad_norm": 4.283771514892578, "learning_rate": 1.2536913902063985e-06, "loss": 0.9881, "step": 10073 }, { "epoch": 0.776595744680851, "grad_norm": 3.5074286460876465, "learning_rate": 1.252864692824346e-06, "loss": 0.9492, "step": 10074 }, { "epoch": 0.776672833795868, "grad_norm": 3.372987747192383, "learning_rate": 1.2520382290567717e-06, "loss": 0.8843, "step": 10075 }, { "epoch": 0.776749922910885, "grad_norm": 3.437117099761963, "learning_rate": 1.2512119989552023e-06, "loss": 0.7762, "step": 10076 }, { "epoch": 0.776827012025902, "grad_norm": 3.731052875518799, "learning_rate": 1.2503860025711494e-06, "loss": 1.0258, "step": 10077 }, { "epoch": 0.7769041011409189, "grad_norm": 3.422943592071533, "learning_rate": 1.2495602399561096e-06, "loss": 0.7939, "step": 10078 }, { "epoch": 0.7769811902559358, "grad_norm": 3.4586517810821533, "learning_rate": 1.248734711161566e-06, "loss": 0.7924, "step": 10079 }, { "epoch": 0.7770582793709528, "grad_norm": 3.3821561336517334, "learning_rate": 1.2479094162389838e-06, "loss": 0.8364, "step": 10080 }, { "epoch": 0.7771353684859698, "grad_norm": 4.26823091506958, "learning_rate": 1.2470843552398166e-06, "loss": 0.9628, "step": 10081 }, { "epoch": 0.7772124576009868, "grad_norm": 3.7963852882385254, "learning_rate": 1.2462595282155032e-06, "loss": 0.9598, "step": 10082 }, { "epoch": 0.7772895467160037, "grad_norm": 3.440056800842285, "learning_rate": 1.2454349352174666e-06, "loss": 0.887, "step": 10083 }, { "epoch": 0.7773666358310206, "grad_norm": 3.769310235977173, "learning_rate": 1.2446105762971167e-06, "loss": 0.9397, "step": 10084 }, { "epoch": 0.7774437249460376, "grad_norm": 3.7799243927001953, "learning_rate": 1.2437864515058495e-06, "loss": 0.9923, "step": 10085 }, { "epoch": 0.7775208140610546, "grad_norm": 3.681706428527832, "learning_rate": 1.2429625608950412e-06, "loss": 0.9673, "step": 10086 }, { "epoch": 0.7775979031760716, "grad_norm": 3.8595070838928223, "learning_rate": 1.2421389045160593e-06, "loss": 0.8813, "step": 10087 }, { "epoch": 0.7776749922910885, "grad_norm": 4.8416361808776855, "learning_rate": 1.2413154824202545e-06, "loss": 0.9058, "step": 10088 }, { "epoch": 0.7777520814061054, "grad_norm": 3.877955198287964, "learning_rate": 1.240492294658962e-06, "loss": 0.9176, "step": 10089 }, { "epoch": 0.7778291705211224, "grad_norm": 3.9161739349365234, "learning_rate": 1.239669341283506e-06, "loss": 0.9904, "step": 10090 }, { "epoch": 0.7779062596361394, "grad_norm": 3.6513936519622803, "learning_rate": 1.2388466223451895e-06, "loss": 0.8185, "step": 10091 }, { "epoch": 0.7779833487511564, "grad_norm": 3.919912099838257, "learning_rate": 1.2380241378953067e-06, "loss": 0.8561, "step": 10092 }, { "epoch": 0.7780604378661733, "grad_norm": 3.2892303466796875, "learning_rate": 1.237201887985135e-06, "loss": 0.902, "step": 10093 }, { "epoch": 0.7781375269811902, "grad_norm": 3.583991527557373, "learning_rate": 1.2363798726659377e-06, "loss": 0.8996, "step": 10094 }, { "epoch": 0.7782146160962072, "grad_norm": 3.9452333450317383, "learning_rate": 1.235558091988963e-06, "loss": 1.0222, "step": 10095 }, { "epoch": 0.7782917052112241, "grad_norm": 3.6943013668060303, "learning_rate": 1.234736546005446e-06, "loss": 0.9208, "step": 10096 }, { "epoch": 0.7783687943262412, "grad_norm": 3.9956469535827637, "learning_rate": 1.2339152347666033e-06, "loss": 1.0566, "step": 10097 }, { "epoch": 0.7784458834412581, "grad_norm": 3.802483558654785, "learning_rate": 1.2330941583236406e-06, "loss": 0.9223, "step": 10098 }, { "epoch": 0.778522972556275, "grad_norm": 3.4962236881256104, "learning_rate": 1.2322733167277479e-06, "loss": 0.8389, "step": 10099 }, { "epoch": 0.778600061671292, "grad_norm": 3.899533271789551, "learning_rate": 1.2314527100301005e-06, "loss": 1.0017, "step": 10100 }, { "epoch": 0.778677150786309, "grad_norm": 3.7576115131378174, "learning_rate": 1.2306323382818596e-06, "loss": 0.9071, "step": 10101 }, { "epoch": 0.778754239901326, "grad_norm": 3.894066333770752, "learning_rate": 1.2298122015341696e-06, "loss": 1.0241, "step": 10102 }, { "epoch": 0.7788313290163429, "grad_norm": 3.69673490524292, "learning_rate": 1.2289922998381625e-06, "loss": 0.9332, "step": 10103 }, { "epoch": 0.7789084181313598, "grad_norm": 4.03447151184082, "learning_rate": 1.2281726332449544e-06, "loss": 1.017, "step": 10104 }, { "epoch": 0.7789855072463768, "grad_norm": 3.7014174461364746, "learning_rate": 1.2273532018056482e-06, "loss": 0.9177, "step": 10105 }, { "epoch": 0.7790625963613937, "grad_norm": 4.122037410736084, "learning_rate": 1.226534005571331e-06, "loss": 0.918, "step": 10106 }, { "epoch": 0.7791396854764108, "grad_norm": 3.817849636077881, "learning_rate": 1.2257150445930765e-06, "loss": 0.8933, "step": 10107 }, { "epoch": 0.7792167745914277, "grad_norm": 3.191657304763794, "learning_rate": 1.2248963189219398e-06, "loss": 0.7782, "step": 10108 }, { "epoch": 0.7792938637064446, "grad_norm": 3.7507643699645996, "learning_rate": 1.224077828608966e-06, "loss": 0.9169, "step": 10109 }, { "epoch": 0.7793709528214616, "grad_norm": 3.5642547607421875, "learning_rate": 1.223259573705184e-06, "loss": 0.9119, "step": 10110 }, { "epoch": 0.7794480419364785, "grad_norm": 3.764009952545166, "learning_rate": 1.2224415542616069e-06, "loss": 0.8835, "step": 10111 }, { "epoch": 0.7795251310514956, "grad_norm": 3.5799670219421387, "learning_rate": 1.2216237703292361e-06, "loss": 0.9183, "step": 10112 }, { "epoch": 0.7796022201665125, "grad_norm": 3.8919787406921387, "learning_rate": 1.2208062219590527e-06, "loss": 0.9795, "step": 10113 }, { "epoch": 0.7796793092815294, "grad_norm": 3.9707539081573486, "learning_rate": 1.2199889092020289e-06, "loss": 1.004, "step": 10114 }, { "epoch": 0.7797563983965464, "grad_norm": 3.72212815284729, "learning_rate": 1.2191718321091178e-06, "loss": 0.9145, "step": 10115 }, { "epoch": 0.7798334875115633, "grad_norm": 3.555389642715454, "learning_rate": 1.2183549907312624e-06, "loss": 0.8632, "step": 10116 }, { "epoch": 0.7799105766265804, "grad_norm": 3.579590320587158, "learning_rate": 1.2175383851193901e-06, "loss": 0.8866, "step": 10117 }, { "epoch": 0.7799876657415973, "grad_norm": 4.378693103790283, "learning_rate": 1.2167220153244075e-06, "loss": 0.8158, "step": 10118 }, { "epoch": 0.7800647548566142, "grad_norm": 3.398848056793213, "learning_rate": 1.2159058813972135e-06, "loss": 0.9214, "step": 10119 }, { "epoch": 0.7801418439716312, "grad_norm": 3.9123952388763428, "learning_rate": 1.2150899833886892e-06, "loss": 0.8256, "step": 10120 }, { "epoch": 0.7802189330866481, "grad_norm": 4.100687026977539, "learning_rate": 1.214274321349702e-06, "loss": 0.9506, "step": 10121 }, { "epoch": 0.7802960222016652, "grad_norm": 3.491703748703003, "learning_rate": 1.2134588953311056e-06, "loss": 0.8823, "step": 10122 }, { "epoch": 0.7803731113166821, "grad_norm": 3.760547637939453, "learning_rate": 1.2126437053837348e-06, "loss": 0.9935, "step": 10123 }, { "epoch": 0.780450200431699, "grad_norm": 3.883286237716675, "learning_rate": 1.2118287515584132e-06, "loss": 0.8716, "step": 10124 }, { "epoch": 0.780527289546716, "grad_norm": 3.697366714477539, "learning_rate": 1.2110140339059501e-06, "loss": 0.8956, "step": 10125 }, { "epoch": 0.7806043786617329, "grad_norm": 3.8138182163238525, "learning_rate": 1.2101995524771376e-06, "loss": 0.8893, "step": 10126 }, { "epoch": 0.78068146777675, "grad_norm": 3.8224334716796875, "learning_rate": 1.2093853073227574e-06, "loss": 0.9656, "step": 10127 }, { "epoch": 0.7807585568917669, "grad_norm": 3.8924286365509033, "learning_rate": 1.2085712984935693e-06, "loss": 0.8809, "step": 10128 }, { "epoch": 0.7808356460067838, "grad_norm": 3.684929847717285, "learning_rate": 1.2077575260403247e-06, "loss": 1.0676, "step": 10129 }, { "epoch": 0.7809127351218008, "grad_norm": 3.5521864891052246, "learning_rate": 1.2069439900137575e-06, "loss": 0.839, "step": 10130 }, { "epoch": 0.7809898242368177, "grad_norm": 3.7578213214874268, "learning_rate": 1.2061306904645875e-06, "loss": 0.9186, "step": 10131 }, { "epoch": 0.7810669133518348, "grad_norm": 3.4750614166259766, "learning_rate": 1.2053176274435209e-06, "loss": 0.86, "step": 10132 }, { "epoch": 0.7811440024668517, "grad_norm": 3.816967487335205, "learning_rate": 1.2045048010012477e-06, "loss": 0.9405, "step": 10133 }, { "epoch": 0.7812210915818686, "grad_norm": 4.11097526550293, "learning_rate": 1.2036922111884414e-06, "loss": 1.0416, "step": 10134 }, { "epoch": 0.7812981806968856, "grad_norm": 3.8455522060394287, "learning_rate": 1.2028798580557644e-06, "loss": 0.9472, "step": 10135 }, { "epoch": 0.7813752698119025, "grad_norm": 3.7609121799468994, "learning_rate": 1.2020677416538623e-06, "loss": 0.9292, "step": 10136 }, { "epoch": 0.7814523589269196, "grad_norm": 3.555689573287964, "learning_rate": 1.201255862033367e-06, "loss": 0.9046, "step": 10137 }, { "epoch": 0.7815294480419365, "grad_norm": 3.5667762756347656, "learning_rate": 1.2004442192448956e-06, "loss": 0.8375, "step": 10138 }, { "epoch": 0.7816065371569534, "grad_norm": 3.609121322631836, "learning_rate": 1.1996328133390472e-06, "loss": 0.9084, "step": 10139 }, { "epoch": 0.7816836262719704, "grad_norm": 3.747819423675537, "learning_rate": 1.1988216443664102e-06, "loss": 0.8834, "step": 10140 }, { "epoch": 0.7817607153869873, "grad_norm": 3.649038314819336, "learning_rate": 1.198010712377557e-06, "loss": 0.9357, "step": 10141 }, { "epoch": 0.7818378045020044, "grad_norm": 3.519754409790039, "learning_rate": 1.1972000174230452e-06, "loss": 0.8994, "step": 10142 }, { "epoch": 0.7819148936170213, "grad_norm": 3.7660558223724365, "learning_rate": 1.1963895595534164e-06, "loss": 0.8964, "step": 10143 }, { "epoch": 0.7819919827320382, "grad_norm": 3.712606430053711, "learning_rate": 1.195579338819201e-06, "loss": 0.8096, "step": 10144 }, { "epoch": 0.7820690718470552, "grad_norm": 3.9107108116149902, "learning_rate": 1.194769355270909e-06, "loss": 0.998, "step": 10145 }, { "epoch": 0.7821461609620721, "grad_norm": 3.800839424133301, "learning_rate": 1.1939596089590394e-06, "loss": 0.8565, "step": 10146 }, { "epoch": 0.7822232500770891, "grad_norm": 3.84592342376709, "learning_rate": 1.1931500999340768e-06, "loss": 0.9408, "step": 10147 }, { "epoch": 0.7823003391921061, "grad_norm": 3.9664783477783203, "learning_rate": 1.1923408282464888e-06, "loss": 1.0395, "step": 10148 }, { "epoch": 0.782377428307123, "grad_norm": 3.7878577709198, "learning_rate": 1.1915317939467314e-06, "loss": 0.964, "step": 10149 }, { "epoch": 0.78245451742214, "grad_norm": 3.499857187271118, "learning_rate": 1.1907229970852407e-06, "loss": 0.8254, "step": 10150 }, { "epoch": 0.7825316065371569, "grad_norm": 3.702730894088745, "learning_rate": 1.1899144377124422e-06, "loss": 0.9191, "step": 10151 }, { "epoch": 0.782608695652174, "grad_norm": 3.7440195083618164, "learning_rate": 1.1891061158787459e-06, "loss": 0.8737, "step": 10152 }, { "epoch": 0.7826857847671909, "grad_norm": 3.973545551300049, "learning_rate": 1.1882980316345461e-06, "loss": 1.0105, "step": 10153 }, { "epoch": 0.7827628738822078, "grad_norm": 3.627835273742676, "learning_rate": 1.1874901850302223e-06, "loss": 0.8726, "step": 10154 }, { "epoch": 0.7828399629972248, "grad_norm": 4.042776107788086, "learning_rate": 1.1866825761161417e-06, "loss": 0.9184, "step": 10155 }, { "epoch": 0.7829170521122417, "grad_norm": 3.9499711990356445, "learning_rate": 1.1858752049426513e-06, "loss": 1.001, "step": 10156 }, { "epoch": 0.7829941412272587, "grad_norm": 3.8737709522247314, "learning_rate": 1.1850680715600883e-06, "loss": 0.9999, "step": 10157 }, { "epoch": 0.7830712303422757, "grad_norm": 3.580709934234619, "learning_rate": 1.1842611760187722e-06, "loss": 0.8882, "step": 10158 }, { "epoch": 0.7831483194572927, "grad_norm": 3.639634847640991, "learning_rate": 1.1834545183690105e-06, "loss": 0.8136, "step": 10159 }, { "epoch": 0.7832254085723096, "grad_norm": 3.59987735748291, "learning_rate": 1.1826480986610928e-06, "loss": 0.8607, "step": 10160 }, { "epoch": 0.7833024976873265, "grad_norm": 3.7638700008392334, "learning_rate": 1.1818419169452954e-06, "loss": 0.836, "step": 10161 }, { "epoch": 0.7833795868023435, "grad_norm": 3.5380563735961914, "learning_rate": 1.1810359732718795e-06, "loss": 0.847, "step": 10162 }, { "epoch": 0.7834566759173605, "grad_norm": 4.049480438232422, "learning_rate": 1.1802302676910926e-06, "loss": 1.0036, "step": 10163 }, { "epoch": 0.7835337650323775, "grad_norm": 3.6710615158081055, "learning_rate": 1.1794248002531644e-06, "loss": 0.8508, "step": 10164 }, { "epoch": 0.7836108541473944, "grad_norm": 3.6866486072540283, "learning_rate": 1.1786195710083149e-06, "loss": 0.8394, "step": 10165 }, { "epoch": 0.7836879432624113, "grad_norm": 4.017769813537598, "learning_rate": 1.1778145800067419e-06, "loss": 0.9146, "step": 10166 }, { "epoch": 0.7837650323774283, "grad_norm": 3.859618902206421, "learning_rate": 1.177009827298634e-06, "loss": 0.9376, "step": 10167 }, { "epoch": 0.7838421214924453, "grad_norm": 3.7508604526519775, "learning_rate": 1.1762053129341643e-06, "loss": 0.8773, "step": 10168 }, { "epoch": 0.7839192106074623, "grad_norm": 4.094524383544922, "learning_rate": 1.175401036963489e-06, "loss": 0.9331, "step": 10169 }, { "epoch": 0.7839962997224792, "grad_norm": 3.7034811973571777, "learning_rate": 1.1745969994367524e-06, "loss": 0.7125, "step": 10170 }, { "epoch": 0.7840733888374961, "grad_norm": 3.425140857696533, "learning_rate": 1.1737932004040792e-06, "loss": 0.8507, "step": 10171 }, { "epoch": 0.7841504779525131, "grad_norm": 3.6695098876953125, "learning_rate": 1.1729896399155831e-06, "loss": 0.96, "step": 10172 }, { "epoch": 0.78422756706753, "grad_norm": 3.393888235092163, "learning_rate": 1.1721863180213627e-06, "loss": 0.7526, "step": 10173 }, { "epoch": 0.7843046561825471, "grad_norm": 3.838538408279419, "learning_rate": 1.171383234771501e-06, "loss": 0.9423, "step": 10174 }, { "epoch": 0.784381745297564, "grad_norm": 3.5390851497650146, "learning_rate": 1.1705803902160668e-06, "loss": 0.9341, "step": 10175 }, { "epoch": 0.7844588344125809, "grad_norm": 3.473173141479492, "learning_rate": 1.1697777844051105e-06, "loss": 0.903, "step": 10176 }, { "epoch": 0.7845359235275979, "grad_norm": 4.052275657653809, "learning_rate": 1.1689754173886725e-06, "loss": 0.9877, "step": 10177 }, { "epoch": 0.7846130126426148, "grad_norm": 3.684760808944702, "learning_rate": 1.1681732892167757e-06, "loss": 0.9197, "step": 10178 }, { "epoch": 0.7846901017576319, "grad_norm": 3.7572269439697266, "learning_rate": 1.1673713999394287e-06, "loss": 0.9295, "step": 10179 }, { "epoch": 0.7847671908726488, "grad_norm": 3.714064359664917, "learning_rate": 1.1665697496066253e-06, "loss": 0.8446, "step": 10180 }, { "epoch": 0.7848442799876657, "grad_norm": 3.450218439102173, "learning_rate": 1.1657683382683454e-06, "loss": 0.9163, "step": 10181 }, { "epoch": 0.7849213691026827, "grad_norm": 3.6925554275512695, "learning_rate": 1.1649671659745504e-06, "loss": 0.9408, "step": 10182 }, { "epoch": 0.7849984582176996, "grad_norm": 4.052911758422852, "learning_rate": 1.1641662327751907e-06, "loss": 0.9494, "step": 10183 }, { "epoch": 0.7850755473327167, "grad_norm": 4.215100288391113, "learning_rate": 1.1633655387201998e-06, "loss": 0.8627, "step": 10184 }, { "epoch": 0.7851526364477336, "grad_norm": 3.8449103832244873, "learning_rate": 1.162565083859497e-06, "loss": 0.9384, "step": 10185 }, { "epoch": 0.7852297255627505, "grad_norm": 3.831585645675659, "learning_rate": 1.1617648682429882e-06, "loss": 0.8777, "step": 10186 }, { "epoch": 0.7853068146777675, "grad_norm": 3.922975540161133, "learning_rate": 1.1609648919205596e-06, "loss": 0.9134, "step": 10187 }, { "epoch": 0.7853839037927844, "grad_norm": 3.6187238693237305, "learning_rate": 1.1601651549420873e-06, "loss": 0.9355, "step": 10188 }, { "epoch": 0.7854609929078015, "grad_norm": 3.704779624938965, "learning_rate": 1.1593656573574302e-06, "loss": 0.9219, "step": 10189 }, { "epoch": 0.7855380820228184, "grad_norm": 4.0944719314575195, "learning_rate": 1.1585663992164336e-06, "loss": 0.9159, "step": 10190 }, { "epoch": 0.7856151711378353, "grad_norm": 3.6618051528930664, "learning_rate": 1.1577673805689266e-06, "loss": 0.8657, "step": 10191 }, { "epoch": 0.7856922602528523, "grad_norm": 3.9331610202789307, "learning_rate": 1.1569686014647253e-06, "loss": 0.9718, "step": 10192 }, { "epoch": 0.7857693493678692, "grad_norm": 3.8769805431365967, "learning_rate": 1.156170061953627e-06, "loss": 0.846, "step": 10193 }, { "epoch": 0.7858464384828863, "grad_norm": 3.4525961875915527, "learning_rate": 1.1553717620854176e-06, "loss": 1.0036, "step": 10194 }, { "epoch": 0.7859235275979032, "grad_norm": 3.376194715499878, "learning_rate": 1.1545737019098668e-06, "loss": 0.8905, "step": 10195 }, { "epoch": 0.7860006167129201, "grad_norm": 3.572995185852051, "learning_rate": 1.1537758814767298e-06, "loss": 0.8374, "step": 10196 }, { "epoch": 0.7860777058279371, "grad_norm": 3.6022350788116455, "learning_rate": 1.1529783008357476e-06, "loss": 0.9317, "step": 10197 }, { "epoch": 0.786154794942954, "grad_norm": 3.5814099311828613, "learning_rate": 1.152180960036643e-06, "loss": 0.8554, "step": 10198 }, { "epoch": 0.7862318840579711, "grad_norm": 3.6974618434906006, "learning_rate": 1.151383859129127e-06, "loss": 0.7929, "step": 10199 }, { "epoch": 0.786308973172988, "grad_norm": 3.654067039489746, "learning_rate": 1.1505869981628953e-06, "loss": 0.8319, "step": 10200 }, { "epoch": 0.7863860622880049, "grad_norm": 3.9007177352905273, "learning_rate": 1.1497903771876272e-06, "loss": 1.0742, "step": 10201 }, { "epoch": 0.7864631514030219, "grad_norm": 4.104714870452881, "learning_rate": 1.1489939962529884e-06, "loss": 0.9199, "step": 10202 }, { "epoch": 0.7865402405180388, "grad_norm": 3.8968212604522705, "learning_rate": 1.148197855408631e-06, "loss": 0.9078, "step": 10203 }, { "epoch": 0.7866173296330559, "grad_norm": 4.120811939239502, "learning_rate": 1.1474019547041848e-06, "loss": 0.9516, "step": 10204 }, { "epoch": 0.7866944187480728, "grad_norm": 3.669375419616699, "learning_rate": 1.1466062941892754e-06, "loss": 0.9964, "step": 10205 }, { "epoch": 0.7867715078630897, "grad_norm": 3.683593988418579, "learning_rate": 1.145810873913506e-06, "loss": 0.8877, "step": 10206 }, { "epoch": 0.7868485969781067, "grad_norm": 3.3355050086975098, "learning_rate": 1.1450156939264689e-06, "loss": 0.8196, "step": 10207 }, { "epoch": 0.7869256860931236, "grad_norm": 3.5134360790252686, "learning_rate": 1.144220754277736e-06, "loss": 0.9914, "step": 10208 }, { "epoch": 0.7870027752081407, "grad_norm": 4.333958625793457, "learning_rate": 1.1434260550168702e-06, "loss": 1.0483, "step": 10209 }, { "epoch": 0.7870798643231576, "grad_norm": 3.63576340675354, "learning_rate": 1.1426315961934159e-06, "loss": 0.848, "step": 10210 }, { "epoch": 0.7871569534381745, "grad_norm": 3.60412859916687, "learning_rate": 1.1418373778569036e-06, "loss": 0.8695, "step": 10211 }, { "epoch": 0.7872340425531915, "grad_norm": 3.5103275775909424, "learning_rate": 1.1410434000568487e-06, "loss": 0.932, "step": 10212 }, { "epoch": 0.7873111316682084, "grad_norm": 4.256019115447998, "learning_rate": 1.1402496628427534e-06, "loss": 0.9959, "step": 10213 }, { "epoch": 0.7873882207832255, "grad_norm": 3.469325304031372, "learning_rate": 1.1394561662641e-06, "loss": 0.8449, "step": 10214 }, { "epoch": 0.7874653098982424, "grad_norm": 3.975343942642212, "learning_rate": 1.1386629103703606e-06, "loss": 0.9372, "step": 10215 }, { "epoch": 0.7875423990132593, "grad_norm": 3.5165517330169678, "learning_rate": 1.13786989521099e-06, "loss": 0.9793, "step": 10216 }, { "epoch": 0.7876194881282763, "grad_norm": 3.727750062942505, "learning_rate": 1.1370771208354291e-06, "loss": 0.9962, "step": 10217 }, { "epoch": 0.7876965772432932, "grad_norm": 3.6379904747009277, "learning_rate": 1.1362845872931044e-06, "loss": 0.8594, "step": 10218 }, { "epoch": 0.7877736663583103, "grad_norm": 3.5502896308898926, "learning_rate": 1.1354922946334241e-06, "loss": 0.8649, "step": 10219 }, { "epoch": 0.7878507554733272, "grad_norm": 3.6985366344451904, "learning_rate": 1.1347002429057835e-06, "loss": 0.9357, "step": 10220 }, { "epoch": 0.7879278445883441, "grad_norm": 3.5729541778564453, "learning_rate": 1.1339084321595644e-06, "loss": 0.892, "step": 10221 }, { "epoch": 0.7880049337033611, "grad_norm": 3.6966254711151123, "learning_rate": 1.133116862444132e-06, "loss": 0.9705, "step": 10222 }, { "epoch": 0.788082022818378, "grad_norm": 3.8931562900543213, "learning_rate": 1.1323255338088368e-06, "loss": 0.8877, "step": 10223 }, { "epoch": 0.788159111933395, "grad_norm": 3.7230522632598877, "learning_rate": 1.131534446303012e-06, "loss": 0.9154, "step": 10224 }, { "epoch": 0.788236201048412, "grad_norm": 3.777541399002075, "learning_rate": 1.1307435999759796e-06, "loss": 0.8524, "step": 10225 }, { "epoch": 0.7883132901634289, "grad_norm": 3.533332586288452, "learning_rate": 1.1299529948770443e-06, "loss": 0.9151, "step": 10226 }, { "epoch": 0.7883903792784459, "grad_norm": 3.7804453372955322, "learning_rate": 1.129162631055496e-06, "loss": 0.8325, "step": 10227 }, { "epoch": 0.7884674683934628, "grad_norm": 3.4261248111724854, "learning_rate": 1.1283725085606101e-06, "loss": 0.8707, "step": 10228 }, { "epoch": 0.7885445575084798, "grad_norm": 3.9265434741973877, "learning_rate": 1.1275826274416485e-06, "loss": 0.9651, "step": 10229 }, { "epoch": 0.7886216466234968, "grad_norm": 3.6662166118621826, "learning_rate": 1.1267929877478522e-06, "loss": 0.9031, "step": 10230 }, { "epoch": 0.7886987357385137, "grad_norm": 3.6205952167510986, "learning_rate": 1.1260035895284538e-06, "loss": 0.935, "step": 10231 }, { "epoch": 0.7887758248535307, "grad_norm": 3.919410467147827, "learning_rate": 1.1252144328326676e-06, "loss": 0.8459, "step": 10232 }, { "epoch": 0.7888529139685476, "grad_norm": 3.570185422897339, "learning_rate": 1.124425517709693e-06, "loss": 0.846, "step": 10233 }, { "epoch": 0.7889300030835646, "grad_norm": 3.781740427017212, "learning_rate": 1.123636844208717e-06, "loss": 0.8971, "step": 10234 }, { "epoch": 0.7890070921985816, "grad_norm": 3.7370405197143555, "learning_rate": 1.1228484123789064e-06, "loss": 0.958, "step": 10235 }, { "epoch": 0.7890841813135985, "grad_norm": 3.855154037475586, "learning_rate": 1.1220602222694166e-06, "loss": 0.9874, "step": 10236 }, { "epoch": 0.7891612704286155, "grad_norm": 3.9142956733703613, "learning_rate": 1.1212722739293875e-06, "loss": 0.903, "step": 10237 }, { "epoch": 0.7892383595436324, "grad_norm": 4.107082366943359, "learning_rate": 1.120484567407944e-06, "loss": 1.0201, "step": 10238 }, { "epoch": 0.7893154486586494, "grad_norm": 3.537972927093506, "learning_rate": 1.1196971027541953e-06, "loss": 0.8097, "step": 10239 }, { "epoch": 0.7893925377736664, "grad_norm": 3.690303087234497, "learning_rate": 1.1189098800172365e-06, "loss": 0.9472, "step": 10240 }, { "epoch": 0.7894696268886833, "grad_norm": 4.062992572784424, "learning_rate": 1.1181228992461451e-06, "loss": 0.8588, "step": 10241 }, { "epoch": 0.7895467160037003, "grad_norm": 3.6081042289733887, "learning_rate": 1.1173361604899857e-06, "loss": 0.8476, "step": 10242 }, { "epoch": 0.7896238051187172, "grad_norm": 3.469123601913452, "learning_rate": 1.1165496637978086e-06, "loss": 0.8059, "step": 10243 }, { "epoch": 0.7897008942337342, "grad_norm": 3.6479201316833496, "learning_rate": 1.1157634092186464e-06, "loss": 0.8624, "step": 10244 }, { "epoch": 0.7897779833487512, "grad_norm": 3.684077024459839, "learning_rate": 1.1149773968015205e-06, "loss": 0.9347, "step": 10245 }, { "epoch": 0.7898550724637681, "grad_norm": 4.0022382736206055, "learning_rate": 1.114191626595431e-06, "loss": 0.9736, "step": 10246 }, { "epoch": 0.7899321615787851, "grad_norm": 3.8564565181732178, "learning_rate": 1.1134060986493688e-06, "loss": 0.834, "step": 10247 }, { "epoch": 0.790009250693802, "grad_norm": 3.95080828666687, "learning_rate": 1.1126208130123056e-06, "loss": 0.8655, "step": 10248 }, { "epoch": 0.790086339808819, "grad_norm": 3.4541101455688477, "learning_rate": 1.1118357697332027e-06, "loss": 0.8184, "step": 10249 }, { "epoch": 0.790163428923836, "grad_norm": 3.7209436893463135, "learning_rate": 1.1110509688610038e-06, "loss": 0.8719, "step": 10250 }, { "epoch": 0.7902405180388529, "grad_norm": 3.6410865783691406, "learning_rate": 1.1102664104446342e-06, "loss": 0.9483, "step": 10251 }, { "epoch": 0.7903176071538699, "grad_norm": 3.3770508766174316, "learning_rate": 1.1094820945330088e-06, "loss": 0.7337, "step": 10252 }, { "epoch": 0.7903946962688868, "grad_norm": 3.8358688354492188, "learning_rate": 1.1086980211750247e-06, "loss": 0.8971, "step": 10253 }, { "epoch": 0.7904717853839038, "grad_norm": 3.432882308959961, "learning_rate": 1.1079141904195662e-06, "loss": 0.8461, "step": 10254 }, { "epoch": 0.7905488744989208, "grad_norm": 4.271652698516846, "learning_rate": 1.107130602315501e-06, "loss": 1.0068, "step": 10255 }, { "epoch": 0.7906259636139377, "grad_norm": 3.7254769802093506, "learning_rate": 1.1063472569116802e-06, "loss": 0.8175, "step": 10256 }, { "epoch": 0.7907030527289547, "grad_norm": 3.4626779556274414, "learning_rate": 1.1055641542569418e-06, "loss": 0.7748, "step": 10257 }, { "epoch": 0.7907801418439716, "grad_norm": 4.178032398223877, "learning_rate": 1.1047812944001084e-06, "loss": 0.9467, "step": 10258 }, { "epoch": 0.7908572309589886, "grad_norm": 3.711949348449707, "learning_rate": 1.103998677389988e-06, "loss": 0.9139, "step": 10259 }, { "epoch": 0.7909343200740055, "grad_norm": 4.120387554168701, "learning_rate": 1.1032163032753717e-06, "loss": 0.9809, "step": 10260 }, { "epoch": 0.7910114091890225, "grad_norm": 3.4602792263031006, "learning_rate": 1.1024341721050385e-06, "loss": 0.9068, "step": 10261 }, { "epoch": 0.7910884983040395, "grad_norm": 3.6511247158050537, "learning_rate": 1.1016522839277471e-06, "loss": 0.8453, "step": 10262 }, { "epoch": 0.7911655874190564, "grad_norm": 4.048055648803711, "learning_rate": 1.1008706387922457e-06, "loss": 1.0161, "step": 10263 }, { "epoch": 0.7912426765340734, "grad_norm": 3.4524850845336914, "learning_rate": 1.100089236747266e-06, "loss": 0.8793, "step": 10264 }, { "epoch": 0.7913197656490903, "grad_norm": 3.5564897060394287, "learning_rate": 1.0993080778415245e-06, "loss": 0.8971, "step": 10265 }, { "epoch": 0.7913968547641073, "grad_norm": 4.036877155303955, "learning_rate": 1.0985271621237231e-06, "loss": 0.91, "step": 10266 }, { "epoch": 0.7914739438791243, "grad_norm": 3.743246078491211, "learning_rate": 1.0977464896425461e-06, "loss": 0.9205, "step": 10267 }, { "epoch": 0.7915510329941412, "grad_norm": 3.4432597160339355, "learning_rate": 1.0969660604466648e-06, "loss": 0.8664, "step": 10268 }, { "epoch": 0.7916281221091582, "grad_norm": 3.663029670715332, "learning_rate": 1.0961858745847348e-06, "loss": 0.9574, "step": 10269 }, { "epoch": 0.7917052112241751, "grad_norm": 3.6875574588775635, "learning_rate": 1.0954059321053978e-06, "loss": 0.859, "step": 10270 }, { "epoch": 0.7917823003391921, "grad_norm": 3.6623551845550537, "learning_rate": 1.0946262330572798e-06, "loss": 0.8892, "step": 10271 }, { "epoch": 0.7918593894542091, "grad_norm": 3.574294328689575, "learning_rate": 1.0938467774889883e-06, "loss": 0.9423, "step": 10272 }, { "epoch": 0.791936478569226, "grad_norm": 3.773029088973999, "learning_rate": 1.0930675654491197e-06, "loss": 0.8767, "step": 10273 }, { "epoch": 0.792013567684243, "grad_norm": 3.5403318405151367, "learning_rate": 1.092288596986254e-06, "loss": 0.962, "step": 10274 }, { "epoch": 0.7920906567992599, "grad_norm": 3.787517786026001, "learning_rate": 1.0915098721489553e-06, "loss": 0.9419, "step": 10275 }, { "epoch": 0.7921677459142769, "grad_norm": 3.657186508178711, "learning_rate": 1.0907313909857737e-06, "loss": 0.9917, "step": 10276 }, { "epoch": 0.7922448350292939, "grad_norm": 3.8778676986694336, "learning_rate": 1.0899531535452452e-06, "loss": 0.8329, "step": 10277 }, { "epoch": 0.7923219241443108, "grad_norm": 4.028141975402832, "learning_rate": 1.0891751598758849e-06, "loss": 0.9051, "step": 10278 }, { "epoch": 0.7923990132593278, "grad_norm": 3.770726442337036, "learning_rate": 1.0883974100261985e-06, "loss": 0.8382, "step": 10279 }, { "epoch": 0.7924761023743447, "grad_norm": 3.7787578105926514, "learning_rate": 1.0876199040446754e-06, "loss": 0.8499, "step": 10280 }, { "epoch": 0.7925531914893617, "grad_norm": 3.5443973541259766, "learning_rate": 1.0868426419797883e-06, "loss": 0.9412, "step": 10281 }, { "epoch": 0.7926302806043787, "grad_norm": 3.392566680908203, "learning_rate": 1.0860656238799971e-06, "loss": 0.8903, "step": 10282 }, { "epoch": 0.7927073697193956, "grad_norm": 3.8018105030059814, "learning_rate": 1.0852888497937424e-06, "loss": 0.8888, "step": 10283 }, { "epoch": 0.7927844588344126, "grad_norm": 3.710118293762207, "learning_rate": 1.0845123197694528e-06, "loss": 1.0176, "step": 10284 }, { "epoch": 0.7928615479494295, "grad_norm": 3.820878028869629, "learning_rate": 1.0837360338555414e-06, "loss": 0.9367, "step": 10285 }, { "epoch": 0.7929386370644464, "grad_norm": 4.015862941741943, "learning_rate": 1.0829599921004054e-06, "loss": 0.8729, "step": 10286 }, { "epoch": 0.7930157261794635, "grad_norm": 3.3819711208343506, "learning_rate": 1.0821841945524265e-06, "loss": 0.8165, "step": 10287 }, { "epoch": 0.7930928152944804, "grad_norm": 4.338066101074219, "learning_rate": 1.081408641259974e-06, "loss": 1.0214, "step": 10288 }, { "epoch": 0.7931699044094974, "grad_norm": 3.5678458213806152, "learning_rate": 1.0806333322713964e-06, "loss": 0.8869, "step": 10289 }, { "epoch": 0.7932469935245143, "grad_norm": 4.043673515319824, "learning_rate": 1.0798582676350316e-06, "loss": 0.9494, "step": 10290 }, { "epoch": 0.7933240826395312, "grad_norm": 3.7308008670806885, "learning_rate": 1.0790834473992013e-06, "loss": 0.7882, "step": 10291 }, { "epoch": 0.7934011717545483, "grad_norm": 4.306490421295166, "learning_rate": 1.0783088716122102e-06, "loss": 0.995, "step": 10292 }, { "epoch": 0.7934782608695652, "grad_norm": 3.6858410835266113, "learning_rate": 1.0775345403223509e-06, "loss": 0.8796, "step": 10293 }, { "epoch": 0.7935553499845822, "grad_norm": 3.7026453018188477, "learning_rate": 1.0767604535778976e-06, "loss": 0.9955, "step": 10294 }, { "epoch": 0.7936324390995991, "grad_norm": 3.596752882003784, "learning_rate": 1.075986611427111e-06, "loss": 0.8235, "step": 10295 }, { "epoch": 0.793709528214616, "grad_norm": 3.608722448348999, "learning_rate": 1.0752130139182364e-06, "loss": 0.8598, "step": 10296 }, { "epoch": 0.7937866173296331, "grad_norm": 3.686950922012329, "learning_rate": 1.0744396610995033e-06, "loss": 0.8529, "step": 10297 }, { "epoch": 0.79386370644465, "grad_norm": 4.59524393081665, "learning_rate": 1.0736665530191276e-06, "loss": 1.1376, "step": 10298 }, { "epoch": 0.793940795559667, "grad_norm": 3.7717888355255127, "learning_rate": 1.072893689725306e-06, "loss": 0.8147, "step": 10299 }, { "epoch": 0.7940178846746839, "grad_norm": 3.5233025550842285, "learning_rate": 1.072121071266224e-06, "loss": 0.8799, "step": 10300 }, { "epoch": 0.7940949737897008, "grad_norm": 3.7410104274749756, "learning_rate": 1.07134869769005e-06, "loss": 0.9286, "step": 10301 }, { "epoch": 0.7941720629047179, "grad_norm": 3.774850606918335, "learning_rate": 1.0705765690449376e-06, "loss": 0.9598, "step": 10302 }, { "epoch": 0.7942491520197348, "grad_norm": 4.010867595672607, "learning_rate": 1.0698046853790268e-06, "loss": 0.9681, "step": 10303 }, { "epoch": 0.7943262411347518, "grad_norm": 3.8371970653533936, "learning_rate": 1.0690330467404375e-06, "loss": 0.9313, "step": 10304 }, { "epoch": 0.7944033302497687, "grad_norm": 3.689708709716797, "learning_rate": 1.0682616531772782e-06, "loss": 0.9279, "step": 10305 }, { "epoch": 0.7944804193647856, "grad_norm": 3.87109112739563, "learning_rate": 1.0674905047376423e-06, "loss": 0.8764, "step": 10306 }, { "epoch": 0.7945575084798027, "grad_norm": 3.5622525215148926, "learning_rate": 1.0667196014696062e-06, "loss": 0.9274, "step": 10307 }, { "epoch": 0.7946345975948196, "grad_norm": 3.48157000541687, "learning_rate": 1.0659489434212323e-06, "loss": 0.8604, "step": 10308 }, { "epoch": 0.7947116867098366, "grad_norm": 3.320413112640381, "learning_rate": 1.0651785306405683e-06, "loss": 0.803, "step": 10309 }, { "epoch": 0.7947887758248535, "grad_norm": 3.5544357299804688, "learning_rate": 1.064408363175642e-06, "loss": 0.9412, "step": 10310 }, { "epoch": 0.7948658649398704, "grad_norm": 3.2090461254119873, "learning_rate": 1.0636384410744716e-06, "loss": 0.8046, "step": 10311 }, { "epoch": 0.7949429540548875, "grad_norm": 3.8545830249786377, "learning_rate": 1.0628687643850572e-06, "loss": 0.9692, "step": 10312 }, { "epoch": 0.7950200431699044, "grad_norm": 3.9732425212860107, "learning_rate": 1.0620993331553848e-06, "loss": 0.8421, "step": 10313 }, { "epoch": 0.7950971322849214, "grad_norm": 3.6110129356384277, "learning_rate": 1.0613301474334254e-06, "loss": 0.943, "step": 10314 }, { "epoch": 0.7951742213999383, "grad_norm": 3.558922529220581, "learning_rate": 1.0605612072671305e-06, "loss": 0.8885, "step": 10315 }, { "epoch": 0.7952513105149552, "grad_norm": 3.5651447772979736, "learning_rate": 1.0597925127044423e-06, "loss": 0.8734, "step": 10316 }, { "epoch": 0.7953283996299723, "grad_norm": 3.470515251159668, "learning_rate": 1.0590240637932835e-06, "loss": 0.7586, "step": 10317 }, { "epoch": 0.7954054887449892, "grad_norm": 3.696683883666992, "learning_rate": 1.0582558605815636e-06, "loss": 0.8775, "step": 10318 }, { "epoch": 0.7954825778600062, "grad_norm": 3.392326593399048, "learning_rate": 1.0574879031171776e-06, "loss": 0.7721, "step": 10319 }, { "epoch": 0.7955596669750231, "grad_norm": 4.037016868591309, "learning_rate": 1.0567201914480002e-06, "loss": 0.8883, "step": 10320 }, { "epoch": 0.79563675609004, "grad_norm": 3.5130562782287598, "learning_rate": 1.0559527256218959e-06, "loss": 0.861, "step": 10321 }, { "epoch": 0.7957138452050571, "grad_norm": 3.640817165374756, "learning_rate": 1.0551855056867132e-06, "loss": 0.9281, "step": 10322 }, { "epoch": 0.795790934320074, "grad_norm": 4.14249324798584, "learning_rate": 1.0544185316902834e-06, "loss": 0.9328, "step": 10323 }, { "epoch": 0.795868023435091, "grad_norm": 3.433025598526001, "learning_rate": 1.0536518036804228e-06, "loss": 0.8419, "step": 10324 }, { "epoch": 0.7959451125501079, "grad_norm": 3.647221565246582, "learning_rate": 1.0528853217049357e-06, "loss": 0.9425, "step": 10325 }, { "epoch": 0.7960222016651248, "grad_norm": 3.8280515670776367, "learning_rate": 1.0521190858116042e-06, "loss": 0.9141, "step": 10326 }, { "epoch": 0.7960992907801419, "grad_norm": 3.922826051712036, "learning_rate": 1.0513530960482016e-06, "loss": 1.0064, "step": 10327 }, { "epoch": 0.7961763798951588, "grad_norm": 3.8651580810546875, "learning_rate": 1.0505873524624821e-06, "loss": 1.0625, "step": 10328 }, { "epoch": 0.7962534690101758, "grad_norm": 3.5953264236450195, "learning_rate": 1.0498218551021876e-06, "loss": 0.8368, "step": 10329 }, { "epoch": 0.7963305581251927, "grad_norm": 3.8444368839263916, "learning_rate": 1.0490566040150428e-06, "loss": 0.9272, "step": 10330 }, { "epoch": 0.7964076472402097, "grad_norm": 3.7892086505889893, "learning_rate": 1.0482915992487546e-06, "loss": 0.8233, "step": 10331 }, { "epoch": 0.7964847363552267, "grad_norm": 3.583220958709717, "learning_rate": 1.0475268408510192e-06, "loss": 1.0282, "step": 10332 }, { "epoch": 0.7965618254702436, "grad_norm": 3.683847427368164, "learning_rate": 1.046762328869515e-06, "loss": 0.8625, "step": 10333 }, { "epoch": 0.7966389145852606, "grad_norm": 3.9110031127929688, "learning_rate": 1.0459980633519052e-06, "loss": 0.9429, "step": 10334 }, { "epoch": 0.7967160037002775, "grad_norm": 3.5115268230438232, "learning_rate": 1.0452340443458376e-06, "loss": 0.8812, "step": 10335 }, { "epoch": 0.7967930928152945, "grad_norm": 3.59769868850708, "learning_rate": 1.0444702718989452e-06, "loss": 0.8564, "step": 10336 }, { "epoch": 0.7968701819303114, "grad_norm": 3.9897191524505615, "learning_rate": 1.043706746058845e-06, "loss": 0.8961, "step": 10337 }, { "epoch": 0.7969472710453284, "grad_norm": 3.6812877655029297, "learning_rate": 1.0429434668731393e-06, "loss": 1.0195, "step": 10338 }, { "epoch": 0.7970243601603454, "grad_norm": 4.031727313995361, "learning_rate": 1.0421804343894142e-06, "loss": 0.9039, "step": 10339 }, { "epoch": 0.7971014492753623, "grad_norm": 4.048935413360596, "learning_rate": 1.0414176486552424e-06, "loss": 0.8887, "step": 10340 }, { "epoch": 0.7971785383903793, "grad_norm": 3.7354636192321777, "learning_rate": 1.0406551097181765e-06, "loss": 0.9092, "step": 10341 }, { "epoch": 0.7972556275053962, "grad_norm": 3.63582706451416, "learning_rate": 1.0398928176257588e-06, "loss": 0.9253, "step": 10342 }, { "epoch": 0.7973327166204132, "grad_norm": 3.9569201469421387, "learning_rate": 1.039130772425514e-06, "loss": 1.0315, "step": 10343 }, { "epoch": 0.7974098057354302, "grad_norm": 3.3830461502075195, "learning_rate": 1.0383689741649516e-06, "loss": 0.8325, "step": 10344 }, { "epoch": 0.7974868948504471, "grad_norm": 4.122878074645996, "learning_rate": 1.0376074228915662e-06, "loss": 1.0077, "step": 10345 }, { "epoch": 0.7975639839654641, "grad_norm": 3.5504045486450195, "learning_rate": 1.0368461186528366e-06, "loss": 0.9757, "step": 10346 }, { "epoch": 0.797641073080481, "grad_norm": 3.6346943378448486, "learning_rate": 1.0360850614962254e-06, "loss": 0.7473, "step": 10347 }, { "epoch": 0.797718162195498, "grad_norm": 3.489760637283325, "learning_rate": 1.0353242514691807e-06, "loss": 0.8558, "step": 10348 }, { "epoch": 0.797795251310515, "grad_norm": 4.047338485717773, "learning_rate": 1.0345636886191351e-06, "loss": 0.9747, "step": 10349 }, { "epoch": 0.7978723404255319, "grad_norm": 3.327815055847168, "learning_rate": 1.0338033729935064e-06, "loss": 0.9214, "step": 10350 }, { "epoch": 0.7979494295405489, "grad_norm": 3.7090442180633545, "learning_rate": 1.0330433046396971e-06, "loss": 0.9179, "step": 10351 }, { "epoch": 0.7980265186555658, "grad_norm": 3.697265863418579, "learning_rate": 1.032283483605091e-06, "loss": 0.8827, "step": 10352 }, { "epoch": 0.7981036077705828, "grad_norm": 3.3949902057647705, "learning_rate": 1.03152390993706e-06, "loss": 0.9044, "step": 10353 }, { "epoch": 0.7981806968855998, "grad_norm": 3.660855531692505, "learning_rate": 1.0307645836829604e-06, "loss": 0.9089, "step": 10354 }, { "epoch": 0.7982577860006167, "grad_norm": 3.7583930492401123, "learning_rate": 1.030005504890132e-06, "loss": 0.9184, "step": 10355 }, { "epoch": 0.7983348751156337, "grad_norm": 4.092621803283691, "learning_rate": 1.0292466736058988e-06, "loss": 0.919, "step": 10356 }, { "epoch": 0.7984119642306506, "grad_norm": 3.7729806900024414, "learning_rate": 1.0284880898775717e-06, "loss": 0.8264, "step": 10357 }, { "epoch": 0.7984890533456676, "grad_norm": 3.7292611598968506, "learning_rate": 1.0277297537524422e-06, "loss": 0.7999, "step": 10358 }, { "epoch": 0.7985661424606846, "grad_norm": 3.5362532138824463, "learning_rate": 1.0269716652777894e-06, "loss": 0.9376, "step": 10359 }, { "epoch": 0.7986432315757015, "grad_norm": 3.8130404949188232, "learning_rate": 1.0262138245008768e-06, "loss": 0.9412, "step": 10360 }, { "epoch": 0.7987203206907185, "grad_norm": 3.703113555908203, "learning_rate": 1.0254562314689508e-06, "loss": 0.9883, "step": 10361 }, { "epoch": 0.7987974098057354, "grad_norm": 3.831934928894043, "learning_rate": 1.0246988862292462e-06, "loss": 0.8621, "step": 10362 }, { "epoch": 0.7988744989207524, "grad_norm": 3.592346668243408, "learning_rate": 1.023941788828976e-06, "loss": 0.9581, "step": 10363 }, { "epoch": 0.7989515880357694, "grad_norm": 3.945913553237915, "learning_rate": 1.023184939315342e-06, "loss": 0.9714, "step": 10364 }, { "epoch": 0.7990286771507863, "grad_norm": 3.960003614425659, "learning_rate": 1.0224283377355316e-06, "loss": 0.9178, "step": 10365 }, { "epoch": 0.7991057662658033, "grad_norm": 3.8358631134033203, "learning_rate": 1.021671984136713e-06, "loss": 0.9071, "step": 10366 }, { "epoch": 0.7991828553808202, "grad_norm": 3.6046762466430664, "learning_rate": 1.020915878566044e-06, "loss": 0.8723, "step": 10367 }, { "epoch": 0.7992599444958371, "grad_norm": 3.6357581615448, "learning_rate": 1.0201600210706596e-06, "loss": 0.8464, "step": 10368 }, { "epoch": 0.7993370336108542, "grad_norm": 3.683241605758667, "learning_rate": 1.0194044116976864e-06, "loss": 0.9561, "step": 10369 }, { "epoch": 0.7994141227258711, "grad_norm": 4.788916110992432, "learning_rate": 1.018649050494232e-06, "loss": 1.021, "step": 10370 }, { "epoch": 0.7994912118408881, "grad_norm": 4.036243438720703, "learning_rate": 1.0178939375073892e-06, "loss": 1.0098, "step": 10371 }, { "epoch": 0.799568300955905, "grad_norm": 3.611304998397827, "learning_rate": 1.0171390727842357e-06, "loss": 0.8585, "step": 10372 }, { "epoch": 0.799645390070922, "grad_norm": 3.9461710453033447, "learning_rate": 1.0163844563718344e-06, "loss": 0.9371, "step": 10373 }, { "epoch": 0.799722479185939, "grad_norm": 3.3774936199188232, "learning_rate": 1.0156300883172292e-06, "loss": 0.8, "step": 10374 }, { "epoch": 0.7997995683009559, "grad_norm": 3.748626947402954, "learning_rate": 1.0148759686674532e-06, "loss": 0.8262, "step": 10375 }, { "epoch": 0.7998766574159729, "grad_norm": 3.4377031326293945, "learning_rate": 1.0141220974695199e-06, "loss": 0.794, "step": 10376 }, { "epoch": 0.7999537465309898, "grad_norm": 3.6376664638519287, "learning_rate": 1.0133684747704314e-06, "loss": 0.8805, "step": 10377 }, { "epoch": 0.8000308356460067, "grad_norm": 3.6229360103607178, "learning_rate": 1.012615100617172e-06, "loss": 0.8017, "step": 10378 }, { "epoch": 0.8001079247610238, "grad_norm": 3.5345287322998047, "learning_rate": 1.0118619750567082e-06, "loss": 0.8068, "step": 10379 }, { "epoch": 0.8001850138760407, "grad_norm": 3.6617918014526367, "learning_rate": 1.0111090981359961e-06, "loss": 1.0157, "step": 10380 }, { "epoch": 0.8002621029910577, "grad_norm": 3.9429585933685303, "learning_rate": 1.0103564699019707e-06, "loss": 0.9599, "step": 10381 }, { "epoch": 0.8003391921060746, "grad_norm": 3.5763978958129883, "learning_rate": 1.009604090401558e-06, "loss": 0.9006, "step": 10382 }, { "epoch": 0.8004162812210915, "grad_norm": 3.58232045173645, "learning_rate": 1.008851959681665e-06, "loss": 0.8655, "step": 10383 }, { "epoch": 0.8004933703361086, "grad_norm": 3.727440118789673, "learning_rate": 1.0081000777891803e-06, "loss": 0.9123, "step": 10384 }, { "epoch": 0.8005704594511255, "grad_norm": 3.6381850242614746, "learning_rate": 1.0073484447709809e-06, "loss": 0.8735, "step": 10385 }, { "epoch": 0.8006475485661425, "grad_norm": 3.738128662109375, "learning_rate": 1.0065970606739273e-06, "loss": 0.8284, "step": 10386 }, { "epoch": 0.8007246376811594, "grad_norm": 3.4860498905181885, "learning_rate": 1.005845925544865e-06, "loss": 0.8732, "step": 10387 }, { "epoch": 0.8008017267961763, "grad_norm": 3.461859703063965, "learning_rate": 1.0050950394306242e-06, "loss": 0.9177, "step": 10388 }, { "epoch": 0.8008788159111934, "grad_norm": 3.968186855316162, "learning_rate": 1.0043444023780164e-06, "loss": 0.9015, "step": 10389 }, { "epoch": 0.8009559050262103, "grad_norm": 3.562736749649048, "learning_rate": 1.0035940144338406e-06, "loss": 0.7696, "step": 10390 }, { "epoch": 0.8010329941412273, "grad_norm": 4.058101654052734, "learning_rate": 1.0028438756448805e-06, "loss": 0.9197, "step": 10391 }, { "epoch": 0.8011100832562442, "grad_norm": 4.087806224822998, "learning_rate": 1.0020939860579033e-06, "loss": 0.9222, "step": 10392 }, { "epoch": 0.8011871723712611, "grad_norm": 3.800830364227295, "learning_rate": 1.0013443457196598e-06, "loss": 1.0234, "step": 10393 }, { "epoch": 0.8012642614862782, "grad_norm": 3.52839994430542, "learning_rate": 1.0005949546768879e-06, "loss": 0.8291, "step": 10394 }, { "epoch": 0.8013413506012951, "grad_norm": 3.8631598949432373, "learning_rate": 9.998458129763062e-07, "loss": 0.8932, "step": 10395 }, { "epoch": 0.8014184397163121, "grad_norm": 3.8592798709869385, "learning_rate": 9.990969206646205e-07, "loss": 1.0186, "step": 10396 }, { "epoch": 0.801495528831329, "grad_norm": 3.7347230911254883, "learning_rate": 9.983482777885211e-07, "loss": 0.9141, "step": 10397 }, { "epoch": 0.8015726179463459, "grad_norm": 3.569770574569702, "learning_rate": 9.975998843946811e-07, "loss": 0.8909, "step": 10398 }, { "epoch": 0.801649707061363, "grad_norm": 3.699913740158081, "learning_rate": 9.968517405297607e-07, "loss": 0.916, "step": 10399 }, { "epoch": 0.8017267961763799, "grad_norm": 3.733384609222412, "learning_rate": 9.961038462403999e-07, "loss": 0.8737, "step": 10400 }, { "epoch": 0.8018038852913969, "grad_norm": 3.7092156410217285, "learning_rate": 9.953562015732281e-07, "loss": 0.9746, "step": 10401 }, { "epoch": 0.8018809744064138, "grad_norm": 3.567920446395874, "learning_rate": 9.94608806574856e-07, "loss": 0.8904, "step": 10402 }, { "epoch": 0.8019580635214307, "grad_norm": 3.6356654167175293, "learning_rate": 9.93861661291881e-07, "loss": 0.9383, "step": 10403 }, { "epoch": 0.8020351526364478, "grad_norm": 3.344292640686035, "learning_rate": 9.931147657708823e-07, "loss": 0.7388, "step": 10404 }, { "epoch": 0.8021122417514647, "grad_norm": 3.598586082458496, "learning_rate": 9.92368120058428e-07, "loss": 0.7381, "step": 10405 }, { "epoch": 0.8021893308664817, "grad_norm": 3.6138226985931396, "learning_rate": 9.916217242010634e-07, "loss": 0.8098, "step": 10406 }, { "epoch": 0.8022664199814986, "grad_norm": 3.5014567375183105, "learning_rate": 9.908755782453245e-07, "loss": 0.8243, "step": 10407 }, { "epoch": 0.8023435090965155, "grad_norm": 3.7613766193389893, "learning_rate": 9.901296822377293e-07, "loss": 0.8441, "step": 10408 }, { "epoch": 0.8024205982115326, "grad_norm": 3.4418587684631348, "learning_rate": 9.893840362247809e-07, "loss": 0.8912, "step": 10409 }, { "epoch": 0.8024976873265495, "grad_norm": 3.7355403900146484, "learning_rate": 9.88638640252968e-07, "loss": 0.9684, "step": 10410 }, { "epoch": 0.8025747764415665, "grad_norm": 3.9282965660095215, "learning_rate": 9.87893494368759e-07, "loss": 0.9324, "step": 10411 }, { "epoch": 0.8026518655565834, "grad_norm": 3.668252944946289, "learning_rate": 9.871485986186114e-07, "loss": 0.9229, "step": 10412 }, { "epoch": 0.8027289546716003, "grad_norm": 3.8640308380126953, "learning_rate": 9.86403953048965e-07, "loss": 0.9495, "step": 10413 }, { "epoch": 0.8028060437866174, "grad_norm": 3.8201818466186523, "learning_rate": 9.856595577062456e-07, "loss": 0.8985, "step": 10414 }, { "epoch": 0.8028831329016343, "grad_norm": 3.5459721088409424, "learning_rate": 9.849154126368638e-07, "loss": 0.9009, "step": 10415 }, { "epoch": 0.8029602220166513, "grad_norm": 3.3526997566223145, "learning_rate": 9.841715178872092e-07, "loss": 0.8461, "step": 10416 }, { "epoch": 0.8030373111316682, "grad_norm": 3.9801721572875977, "learning_rate": 9.834278735036623e-07, "loss": 0.9436, "step": 10417 }, { "epoch": 0.8031144002466851, "grad_norm": 3.7711634635925293, "learning_rate": 9.826844795325852e-07, "loss": 1.0106, "step": 10418 }, { "epoch": 0.8031914893617021, "grad_norm": 3.627129316329956, "learning_rate": 9.819413360203244e-07, "loss": 1.0238, "step": 10419 }, { "epoch": 0.8032685784767191, "grad_norm": 3.4935200214385986, "learning_rate": 9.811984430132116e-07, "loss": 0.7833, "step": 10420 }, { "epoch": 0.8033456675917361, "grad_norm": 3.4968650341033936, "learning_rate": 9.804558005575632e-07, "loss": 0.9387, "step": 10421 }, { "epoch": 0.803422756706753, "grad_norm": 3.8687143325805664, "learning_rate": 9.797134086996762e-07, "loss": 0.8498, "step": 10422 }, { "epoch": 0.8034998458217699, "grad_norm": 3.631547451019287, "learning_rate": 9.78971267485837e-07, "loss": 0.8742, "step": 10423 }, { "epoch": 0.803576934936787, "grad_norm": 3.5340750217437744, "learning_rate": 9.782293769623136e-07, "loss": 0.9018, "step": 10424 }, { "epoch": 0.8036540240518039, "grad_norm": 3.6497690677642822, "learning_rate": 9.774877371753594e-07, "loss": 0.8692, "step": 10425 }, { "epoch": 0.8037311131668209, "grad_norm": 4.077952861785889, "learning_rate": 9.767463481712113e-07, "loss": 0.867, "step": 10426 }, { "epoch": 0.8038082022818378, "grad_norm": 3.7168338298797607, "learning_rate": 9.760052099960921e-07, "loss": 0.8681, "step": 10427 }, { "epoch": 0.8038852913968547, "grad_norm": 3.6925764083862305, "learning_rate": 9.752643226962066e-07, "loss": 0.9753, "step": 10428 }, { "epoch": 0.8039623805118717, "grad_norm": 3.3511204719543457, "learning_rate": 9.745236863177465e-07, "loss": 0.8573, "step": 10429 }, { "epoch": 0.8040394696268887, "grad_norm": 3.528261661529541, "learning_rate": 9.737833009068859e-07, "loss": 0.958, "step": 10430 }, { "epoch": 0.8041165587419057, "grad_norm": 3.954437255859375, "learning_rate": 9.73043166509785e-07, "loss": 1.0196, "step": 10431 }, { "epoch": 0.8041936478569226, "grad_norm": 3.7125864028930664, "learning_rate": 9.723032831725859e-07, "loss": 0.9288, "step": 10432 }, { "epoch": 0.8042707369719395, "grad_norm": 3.7825093269348145, "learning_rate": 9.715636509414168e-07, "loss": 0.967, "step": 10433 }, { "epoch": 0.8043478260869565, "grad_norm": 3.930483818054199, "learning_rate": 9.708242698623898e-07, "loss": 0.8991, "step": 10434 }, { "epoch": 0.8044249152019735, "grad_norm": 3.8229269981384277, "learning_rate": 9.700851399816026e-07, "loss": 0.8454, "step": 10435 }, { "epoch": 0.8045020043169905, "grad_norm": 3.621720790863037, "learning_rate": 9.693462613451365e-07, "loss": 0.9305, "step": 10436 }, { "epoch": 0.8045790934320074, "grad_norm": 3.6314635276794434, "learning_rate": 9.686076339990546e-07, "loss": 0.8921, "step": 10437 }, { "epoch": 0.8046561825470243, "grad_norm": 3.537757635116577, "learning_rate": 9.678692579894072e-07, "loss": 0.9057, "step": 10438 }, { "epoch": 0.8047332716620413, "grad_norm": 3.6594655513763428, "learning_rate": 9.671311333622292e-07, "loss": 0.9033, "step": 10439 }, { "epoch": 0.8048103607770583, "grad_norm": 4.193519592285156, "learning_rate": 9.663932601635378e-07, "loss": 0.9157, "step": 10440 }, { "epoch": 0.8048874498920753, "grad_norm": 3.8593950271606445, "learning_rate": 9.656556384393362e-07, "loss": 0.9297, "step": 10441 }, { "epoch": 0.8049645390070922, "grad_norm": 3.904437780380249, "learning_rate": 9.649182682356122e-07, "loss": 0.8452, "step": 10442 }, { "epoch": 0.8050416281221091, "grad_norm": 4.147661209106445, "learning_rate": 9.64181149598335e-07, "loss": 0.9162, "step": 10443 }, { "epoch": 0.8051187172371261, "grad_norm": 4.427166938781738, "learning_rate": 9.634442825734609e-07, "loss": 0.919, "step": 10444 }, { "epoch": 0.805195806352143, "grad_norm": 3.7740793228149414, "learning_rate": 9.627076672069302e-07, "loss": 0.8899, "step": 10445 }, { "epoch": 0.8052728954671601, "grad_norm": 3.5739588737487793, "learning_rate": 9.619713035446666e-07, "loss": 0.9004, "step": 10446 }, { "epoch": 0.805349984582177, "grad_norm": 3.8233814239501953, "learning_rate": 9.612351916325795e-07, "loss": 0.9768, "step": 10447 }, { "epoch": 0.8054270736971939, "grad_norm": 4.079464912414551, "learning_rate": 9.604993315165607e-07, "loss": 1.0417, "step": 10448 }, { "epoch": 0.8055041628122109, "grad_norm": 3.4598426818847656, "learning_rate": 9.597637232424866e-07, "loss": 0.8658, "step": 10449 }, { "epoch": 0.8055812519272278, "grad_norm": 3.5878536701202393, "learning_rate": 9.590283668562195e-07, "loss": 0.8726, "step": 10450 }, { "epoch": 0.8056583410422449, "grad_norm": 3.5726075172424316, "learning_rate": 9.582932624036052e-07, "loss": 0.8884, "step": 10451 }, { "epoch": 0.8057354301572618, "grad_norm": 3.6233930587768555, "learning_rate": 9.575584099304735e-07, "loss": 0.8807, "step": 10452 }, { "epoch": 0.8058125192722787, "grad_norm": 3.603677272796631, "learning_rate": 9.5682380948264e-07, "loss": 0.9608, "step": 10453 }, { "epoch": 0.8058896083872957, "grad_norm": 3.6978039741516113, "learning_rate": 9.560894611059001e-07, "loss": 0.906, "step": 10454 }, { "epoch": 0.8059666975023126, "grad_norm": 3.5961194038391113, "learning_rate": 9.55355364846039e-07, "loss": 0.9432, "step": 10455 }, { "epoch": 0.8060437866173297, "grad_norm": 3.6993865966796875, "learning_rate": 9.546215207488225e-07, "loss": 0.9392, "step": 10456 }, { "epoch": 0.8061208757323466, "grad_norm": 4.084522724151611, "learning_rate": 9.538879288600028e-07, "loss": 0.8663, "step": 10457 }, { "epoch": 0.8061979648473635, "grad_norm": 3.5728228092193604, "learning_rate": 9.531545892253169e-07, "loss": 0.9397, "step": 10458 }, { "epoch": 0.8062750539623805, "grad_norm": 3.443225383758545, "learning_rate": 9.52421501890482e-07, "loss": 0.8319, "step": 10459 }, { "epoch": 0.8063521430773974, "grad_norm": 3.599980115890503, "learning_rate": 9.516886669012032e-07, "loss": 0.9463, "step": 10460 }, { "epoch": 0.8064292321924145, "grad_norm": 3.455798387527466, "learning_rate": 9.509560843031695e-07, "loss": 0.8954, "step": 10461 }, { "epoch": 0.8065063213074314, "grad_norm": 3.5912976264953613, "learning_rate": 9.502237541420534e-07, "loss": 0.9573, "step": 10462 }, { "epoch": 0.8065834104224483, "grad_norm": 3.59824275970459, "learning_rate": 9.494916764635126e-07, "loss": 0.8395, "step": 10463 }, { "epoch": 0.8066604995374653, "grad_norm": 3.764808416366577, "learning_rate": 9.487598513131868e-07, "loss": 0.8776, "step": 10464 }, { "epoch": 0.8067375886524822, "grad_norm": 3.6447689533233643, "learning_rate": 9.480282787367029e-07, "loss": 0.8773, "step": 10465 }, { "epoch": 0.8068146777674993, "grad_norm": 3.842656373977661, "learning_rate": 9.472969587796694e-07, "loss": 0.9713, "step": 10466 }, { "epoch": 0.8068917668825162, "grad_norm": 3.9619832038879395, "learning_rate": 9.465658914876813e-07, "loss": 0.871, "step": 10467 }, { "epoch": 0.8069688559975331, "grad_norm": 3.368314743041992, "learning_rate": 9.458350769063162e-07, "loss": 0.9105, "step": 10468 }, { "epoch": 0.8070459451125501, "grad_norm": 3.7825496196746826, "learning_rate": 9.451045150811377e-07, "loss": 0.9072, "step": 10469 }, { "epoch": 0.807123034227567, "grad_norm": 4.548655033111572, "learning_rate": 9.443742060576916e-07, "loss": 0.8509, "step": 10470 }, { "epoch": 0.8072001233425841, "grad_norm": 3.4712092876434326, "learning_rate": 9.436441498815086e-07, "loss": 0.8334, "step": 10471 }, { "epoch": 0.807277212457601, "grad_norm": 3.7380151748657227, "learning_rate": 9.42914346598105e-07, "loss": 0.8441, "step": 10472 }, { "epoch": 0.8073543015726179, "grad_norm": 4.4421467781066895, "learning_rate": 9.421847962529806e-07, "loss": 0.8698, "step": 10473 }, { "epoch": 0.8074313906876349, "grad_norm": 3.492043972015381, "learning_rate": 9.414554988916175e-07, "loss": 0.86, "step": 10474 }, { "epoch": 0.8075084798026518, "grad_norm": 3.6429760456085205, "learning_rate": 9.407264545594841e-07, "loss": 0.9613, "step": 10475 }, { "epoch": 0.8075855689176689, "grad_norm": 4.198061466217041, "learning_rate": 9.399976633020325e-07, "loss": 1.0213, "step": 10476 }, { "epoch": 0.8076626580326858, "grad_norm": 4.003988265991211, "learning_rate": 9.392691251646991e-07, "loss": 0.9604, "step": 10477 }, { "epoch": 0.8077397471477027, "grad_norm": 3.8301596641540527, "learning_rate": 9.385408401929052e-07, "loss": 0.9308, "step": 10478 }, { "epoch": 0.8078168362627197, "grad_norm": 3.498955726623535, "learning_rate": 9.378128084320559e-07, "loss": 0.9101, "step": 10479 }, { "epoch": 0.8078939253777366, "grad_norm": 3.979250907897949, "learning_rate": 9.370850299275386e-07, "loss": 0.976, "step": 10480 }, { "epoch": 0.8079710144927537, "grad_norm": 3.989082098007202, "learning_rate": 9.36357504724727e-07, "loss": 0.9461, "step": 10481 }, { "epoch": 0.8080481036077706, "grad_norm": 3.72397518157959, "learning_rate": 9.356302328689787e-07, "loss": 0.9482, "step": 10482 }, { "epoch": 0.8081251927227875, "grad_norm": 3.821753740310669, "learning_rate": 9.349032144056358e-07, "loss": 0.9482, "step": 10483 }, { "epoch": 0.8082022818378045, "grad_norm": 3.7636606693267822, "learning_rate": 9.341764493800248e-07, "loss": 0.7877, "step": 10484 }, { "epoch": 0.8082793709528214, "grad_norm": 3.7938175201416016, "learning_rate": 9.334499378374534e-07, "loss": 0.8539, "step": 10485 }, { "epoch": 0.8083564600678385, "grad_norm": 3.736424446105957, "learning_rate": 9.327236798232176e-07, "loss": 0.9258, "step": 10486 }, { "epoch": 0.8084335491828554, "grad_norm": 3.6965887546539307, "learning_rate": 9.319976753825949e-07, "loss": 0.871, "step": 10487 }, { "epoch": 0.8085106382978723, "grad_norm": 3.8846166133880615, "learning_rate": 9.312719245608487e-07, "loss": 0.9277, "step": 10488 }, { "epoch": 0.8085877274128893, "grad_norm": 4.221765518188477, "learning_rate": 9.305464274032256e-07, "loss": 0.9001, "step": 10489 }, { "epoch": 0.8086648165279062, "grad_norm": 3.9422693252563477, "learning_rate": 9.298211839549576e-07, "loss": 0.9941, "step": 10490 }, { "epoch": 0.8087419056429233, "grad_norm": 3.6819467544555664, "learning_rate": 9.290961942612576e-07, "loss": 0.855, "step": 10491 }, { "epoch": 0.8088189947579402, "grad_norm": 3.909057140350342, "learning_rate": 9.283714583673264e-07, "loss": 0.8636, "step": 10492 }, { "epoch": 0.8088960838729571, "grad_norm": 3.8165695667266846, "learning_rate": 9.276469763183471e-07, "loss": 0.9215, "step": 10493 }, { "epoch": 0.8089731729879741, "grad_norm": 3.7066540718078613, "learning_rate": 9.269227481594872e-07, "loss": 0.8403, "step": 10494 }, { "epoch": 0.809050262102991, "grad_norm": 3.6968142986297607, "learning_rate": 9.261987739359007e-07, "loss": 0.9137, "step": 10495 }, { "epoch": 0.809127351218008, "grad_norm": 3.936868906021118, "learning_rate": 9.254750536927204e-07, "loss": 1.0582, "step": 10496 }, { "epoch": 0.809204440333025, "grad_norm": 3.842881441116333, "learning_rate": 9.247515874750679e-07, "loss": 0.9131, "step": 10497 }, { "epoch": 0.809281529448042, "grad_norm": 3.542695999145508, "learning_rate": 9.240283753280477e-07, "loss": 0.7972, "step": 10498 }, { "epoch": 0.8093586185630589, "grad_norm": 3.8389735221862793, "learning_rate": 9.233054172967481e-07, "loss": 0.9359, "step": 10499 }, { "epoch": 0.8094357076780758, "grad_norm": 3.6747260093688965, "learning_rate": 9.225827134262422e-07, "loss": 0.901, "step": 10500 }, { "epoch": 0.8095127967930928, "grad_norm": 4.066951751708984, "learning_rate": 9.218602637615882e-07, "loss": 0.8817, "step": 10501 }, { "epoch": 0.8095898859081098, "grad_norm": 3.7741777896881104, "learning_rate": 9.211380683478238e-07, "loss": 1.0278, "step": 10502 }, { "epoch": 0.8096669750231268, "grad_norm": 4.318873882293701, "learning_rate": 9.204161272299761e-07, "loss": 1.0011, "step": 10503 }, { "epoch": 0.8097440641381437, "grad_norm": 3.677009105682373, "learning_rate": 9.196944404530544e-07, "loss": 0.8841, "step": 10504 }, { "epoch": 0.8098211532531606, "grad_norm": 3.3423690795898438, "learning_rate": 9.189730080620512e-07, "loss": 0.7182, "step": 10505 }, { "epoch": 0.8098982423681776, "grad_norm": 3.752957344055176, "learning_rate": 9.182518301019466e-07, "loss": 0.9169, "step": 10506 }, { "epoch": 0.8099753314831946, "grad_norm": 4.208918571472168, "learning_rate": 9.175309066176991e-07, "loss": 0.9648, "step": 10507 }, { "epoch": 0.8100524205982116, "grad_norm": 3.9163053035736084, "learning_rate": 9.168102376542559e-07, "loss": 0.8975, "step": 10508 }, { "epoch": 0.8101295097132285, "grad_norm": 3.690941333770752, "learning_rate": 9.160898232565468e-07, "loss": 0.8554, "step": 10509 }, { "epoch": 0.8102065988282454, "grad_norm": 3.9642856121063232, "learning_rate": 9.153696634694865e-07, "loss": 1.0219, "step": 10510 }, { "epoch": 0.8102836879432624, "grad_norm": 3.2848901748657227, "learning_rate": 9.146497583379737e-07, "loss": 0.8234, "step": 10511 }, { "epoch": 0.8103607770582794, "grad_norm": 3.8563730716705322, "learning_rate": 9.139301079068891e-07, "loss": 0.9495, "step": 10512 }, { "epoch": 0.8104378661732964, "grad_norm": 3.7783915996551514, "learning_rate": 9.132107122210998e-07, "loss": 0.9717, "step": 10513 }, { "epoch": 0.8105149552883133, "grad_norm": 3.5279428958892822, "learning_rate": 9.124915713254551e-07, "loss": 0.9075, "step": 10514 }, { "epoch": 0.8105920444033302, "grad_norm": 3.8966267108917236, "learning_rate": 9.117726852647924e-07, "loss": 0.9536, "step": 10515 }, { "epoch": 0.8106691335183472, "grad_norm": 3.5992579460144043, "learning_rate": 9.110540540839307e-07, "loss": 0.866, "step": 10516 }, { "epoch": 0.8107462226333642, "grad_norm": 3.7475366592407227, "learning_rate": 9.103356778276701e-07, "loss": 0.9517, "step": 10517 }, { "epoch": 0.8108233117483812, "grad_norm": 3.9139578342437744, "learning_rate": 9.096175565407994e-07, "loss": 0.9181, "step": 10518 }, { "epoch": 0.8109004008633981, "grad_norm": 3.3899145126342773, "learning_rate": 9.088996902680891e-07, "loss": 0.8865, "step": 10519 }, { "epoch": 0.810977489978415, "grad_norm": 3.5167202949523926, "learning_rate": 9.08182079054295e-07, "loss": 0.9024, "step": 10520 }, { "epoch": 0.811054579093432, "grad_norm": 4.085640907287598, "learning_rate": 9.074647229441575e-07, "loss": 0.9606, "step": 10521 }, { "epoch": 0.811131668208449, "grad_norm": 3.6280179023742676, "learning_rate": 9.067476219823973e-07, "loss": 0.963, "step": 10522 }, { "epoch": 0.811208757323466, "grad_norm": 4.181948184967041, "learning_rate": 9.060307762137233e-07, "loss": 0.9805, "step": 10523 }, { "epoch": 0.8112858464384829, "grad_norm": 3.7412025928497314, "learning_rate": 9.053141856828274e-07, "loss": 0.888, "step": 10524 }, { "epoch": 0.8113629355534998, "grad_norm": 3.9258925914764404, "learning_rate": 9.04597850434385e-07, "loss": 1.0001, "step": 10525 }, { "epoch": 0.8114400246685168, "grad_norm": 3.6146883964538574, "learning_rate": 9.038817705130559e-07, "loss": 0.9586, "step": 10526 }, { "epoch": 0.8115171137835338, "grad_norm": 3.679476499557495, "learning_rate": 9.031659459634856e-07, "loss": 0.9259, "step": 10527 }, { "epoch": 0.8115942028985508, "grad_norm": 4.017477512359619, "learning_rate": 9.024503768302989e-07, "loss": 0.9798, "step": 10528 }, { "epoch": 0.8116712920135677, "grad_norm": 3.543410301208496, "learning_rate": 9.017350631581095e-07, "loss": 0.9361, "step": 10529 }, { "epoch": 0.8117483811285846, "grad_norm": 3.8175089359283447, "learning_rate": 9.010200049915141e-07, "loss": 0.851, "step": 10530 }, { "epoch": 0.8118254702436016, "grad_norm": 4.348031997680664, "learning_rate": 9.003052023750913e-07, "loss": 0.9564, "step": 10531 }, { "epoch": 0.8119025593586185, "grad_norm": 3.7295446395874023, "learning_rate": 8.995906553534084e-07, "loss": 0.8679, "step": 10532 }, { "epoch": 0.8119796484736356, "grad_norm": 3.961146116256714, "learning_rate": 8.988763639710097e-07, "loss": 0.9047, "step": 10533 }, { "epoch": 0.8120567375886525, "grad_norm": 3.5502161979675293, "learning_rate": 8.981623282724295e-07, "loss": 0.9201, "step": 10534 }, { "epoch": 0.8121338267036694, "grad_norm": 4.238675594329834, "learning_rate": 8.974485483021844e-07, "loss": 0.8612, "step": 10535 }, { "epoch": 0.8122109158186864, "grad_norm": 3.8428282737731934, "learning_rate": 8.967350241047745e-07, "loss": 0.887, "step": 10536 }, { "epoch": 0.8122880049337033, "grad_norm": 3.5076427459716797, "learning_rate": 8.960217557246842e-07, "loss": 0.9022, "step": 10537 }, { "epoch": 0.8123650940487204, "grad_norm": 3.7073769569396973, "learning_rate": 8.953087432063839e-07, "loss": 0.9396, "step": 10538 }, { "epoch": 0.8124421831637373, "grad_norm": 4.131918430328369, "learning_rate": 8.945959865943238e-07, "loss": 0.9772, "step": 10539 }, { "epoch": 0.8125192722787542, "grad_norm": 3.5746655464172363, "learning_rate": 8.938834859329415e-07, "loss": 0.8473, "step": 10540 }, { "epoch": 0.8125963613937712, "grad_norm": 3.607383966445923, "learning_rate": 8.931712412666571e-07, "loss": 0.9258, "step": 10541 }, { "epoch": 0.8126734505087881, "grad_norm": 3.5877065658569336, "learning_rate": 8.924592526398762e-07, "loss": 0.8664, "step": 10542 }, { "epoch": 0.8127505396238052, "grad_norm": 3.302738904953003, "learning_rate": 8.917475200969889e-07, "loss": 0.7732, "step": 10543 }, { "epoch": 0.8128276287388221, "grad_norm": 3.6982762813568115, "learning_rate": 8.910360436823651e-07, "loss": 0.9946, "step": 10544 }, { "epoch": 0.812904717853839, "grad_norm": 3.758657932281494, "learning_rate": 8.903248234403633e-07, "loss": 0.8554, "step": 10545 }, { "epoch": 0.812981806968856, "grad_norm": 3.797997236251831, "learning_rate": 8.896138594153242e-07, "loss": 0.9052, "step": 10546 }, { "epoch": 0.8130588960838729, "grad_norm": 4.33210563659668, "learning_rate": 8.889031516515729e-07, "loss": 1.022, "step": 10547 }, { "epoch": 0.81313598519889, "grad_norm": 3.558067560195923, "learning_rate": 8.881927001934177e-07, "loss": 0.829, "step": 10548 }, { "epoch": 0.8132130743139069, "grad_norm": 3.977452516555786, "learning_rate": 8.874825050851532e-07, "loss": 0.9614, "step": 10549 }, { "epoch": 0.8132901634289238, "grad_norm": 4.086385250091553, "learning_rate": 8.867725663710547e-07, "loss": 1.0238, "step": 10550 }, { "epoch": 0.8133672525439408, "grad_norm": 4.283471584320068, "learning_rate": 8.860628840953833e-07, "loss": 0.8701, "step": 10551 }, { "epoch": 0.8134443416589577, "grad_norm": 4.118513584136963, "learning_rate": 8.853534583023843e-07, "loss": 0.9455, "step": 10552 }, { "epoch": 0.8135214307739748, "grad_norm": 3.710076332092285, "learning_rate": 8.846442890362872e-07, "loss": 0.8362, "step": 10553 }, { "epoch": 0.8135985198889917, "grad_norm": 3.5066654682159424, "learning_rate": 8.839353763413056e-07, "loss": 0.8795, "step": 10554 }, { "epoch": 0.8136756090040086, "grad_norm": 3.774261236190796, "learning_rate": 8.832267202616346e-07, "loss": 0.9621, "step": 10555 }, { "epoch": 0.8137526981190256, "grad_norm": 3.7309882640838623, "learning_rate": 8.825183208414562e-07, "loss": 0.9751, "step": 10556 }, { "epoch": 0.8138297872340425, "grad_norm": 3.934528112411499, "learning_rate": 8.818101781249355e-07, "loss": 0.9044, "step": 10557 }, { "epoch": 0.8139068763490596, "grad_norm": 3.5573508739471436, "learning_rate": 8.811022921562218e-07, "loss": 0.8235, "step": 10558 }, { "epoch": 0.8139839654640765, "grad_norm": 3.88106632232666, "learning_rate": 8.803946629794475e-07, "loss": 0.9063, "step": 10559 }, { "epoch": 0.8140610545790934, "grad_norm": 3.713273048400879, "learning_rate": 8.796872906387299e-07, "loss": 0.8801, "step": 10560 }, { "epoch": 0.8141381436941104, "grad_norm": 3.7265689373016357, "learning_rate": 8.789801751781707e-07, "loss": 0.9106, "step": 10561 }, { "epoch": 0.8142152328091273, "grad_norm": 3.56809401512146, "learning_rate": 8.782733166418539e-07, "loss": 0.9087, "step": 10562 }, { "epoch": 0.8142923219241444, "grad_norm": 4.446261405944824, "learning_rate": 8.775667150738487e-07, "loss": 0.9448, "step": 10563 }, { "epoch": 0.8143694110391613, "grad_norm": 3.8217625617980957, "learning_rate": 8.768603705182094e-07, "loss": 0.8059, "step": 10564 }, { "epoch": 0.8144465001541782, "grad_norm": 3.5717344284057617, "learning_rate": 8.761542830189706e-07, "loss": 0.859, "step": 10565 }, { "epoch": 0.8145235892691952, "grad_norm": 3.8214683532714844, "learning_rate": 8.754484526201546e-07, "loss": 0.8514, "step": 10566 }, { "epoch": 0.8146006783842121, "grad_norm": 4.17495059967041, "learning_rate": 8.747428793657658e-07, "loss": 0.9882, "step": 10567 }, { "epoch": 0.8146777674992292, "grad_norm": 4.129594326019287, "learning_rate": 8.740375632997927e-07, "loss": 0.9762, "step": 10568 }, { "epoch": 0.8147548566142461, "grad_norm": 3.8099358081817627, "learning_rate": 8.733325044662106e-07, "loss": 0.9268, "step": 10569 }, { "epoch": 0.814831945729263, "grad_norm": 4.046210765838623, "learning_rate": 8.726277029089725e-07, "loss": 0.8863, "step": 10570 }, { "epoch": 0.81490903484428, "grad_norm": 4.233992099761963, "learning_rate": 8.719231586720211e-07, "loss": 0.9588, "step": 10571 }, { "epoch": 0.8149861239592969, "grad_norm": 3.290083646774292, "learning_rate": 8.712188717992814e-07, "loss": 0.8304, "step": 10572 }, { "epoch": 0.815063213074314, "grad_norm": 3.646782875061035, "learning_rate": 8.70514842334661e-07, "loss": 0.9994, "step": 10573 }, { "epoch": 0.8151403021893309, "grad_norm": 3.9229447841644287, "learning_rate": 8.69811070322053e-07, "loss": 0.8786, "step": 10574 }, { "epoch": 0.8152173913043478, "grad_norm": 3.7923243045806885, "learning_rate": 8.691075558053353e-07, "loss": 0.936, "step": 10575 }, { "epoch": 0.8152944804193648, "grad_norm": 3.350618600845337, "learning_rate": 8.684042988283659e-07, "loss": 0.858, "step": 10576 }, { "epoch": 0.8153715695343817, "grad_norm": 4.026613235473633, "learning_rate": 8.677012994349904e-07, "loss": 0.9224, "step": 10577 }, { "epoch": 0.8154486586493987, "grad_norm": 3.9146225452423096, "learning_rate": 8.669985576690371e-07, "loss": 0.8832, "step": 10578 }, { "epoch": 0.8155257477644157, "grad_norm": 3.648346185684204, "learning_rate": 8.662960735743181e-07, "loss": 0.7847, "step": 10579 }, { "epoch": 0.8156028368794326, "grad_norm": 3.5210020542144775, "learning_rate": 8.655938471946313e-07, "loss": 0.8185, "step": 10580 }, { "epoch": 0.8156799259944496, "grad_norm": 4.143526554107666, "learning_rate": 8.648918785737542e-07, "loss": 0.8891, "step": 10581 }, { "epoch": 0.8157570151094665, "grad_norm": 3.710387706756592, "learning_rate": 8.641901677554526e-07, "loss": 0.8862, "step": 10582 }, { "epoch": 0.8158341042244835, "grad_norm": 3.5736660957336426, "learning_rate": 8.634887147834736e-07, "loss": 0.8868, "step": 10583 }, { "epoch": 0.8159111933395005, "grad_norm": 3.8351974487304688, "learning_rate": 8.6278751970155e-07, "loss": 0.9472, "step": 10584 }, { "epoch": 0.8159882824545174, "grad_norm": 3.8771169185638428, "learning_rate": 8.620865825533975e-07, "loss": 0.9296, "step": 10585 }, { "epoch": 0.8160653715695344, "grad_norm": 3.649178981781006, "learning_rate": 8.613859033827165e-07, "loss": 0.9505, "step": 10586 }, { "epoch": 0.8161424606845513, "grad_norm": 3.742682456970215, "learning_rate": 8.606854822331895e-07, "loss": 0.8974, "step": 10587 }, { "epoch": 0.8162195497995683, "grad_norm": 4.2910237312316895, "learning_rate": 8.599853191484842e-07, "loss": 0.9299, "step": 10588 }, { "epoch": 0.8162966389145853, "grad_norm": 3.860732078552246, "learning_rate": 8.592854141722523e-07, "loss": 1.0105, "step": 10589 }, { "epoch": 0.8163737280296022, "grad_norm": 3.7548787593841553, "learning_rate": 8.585857673481302e-07, "loss": 0.9132, "step": 10590 }, { "epoch": 0.8164508171446192, "grad_norm": 3.929503917694092, "learning_rate": 8.578863787197372e-07, "loss": 0.9521, "step": 10591 }, { "epoch": 0.8165279062596361, "grad_norm": 4.102705955505371, "learning_rate": 8.571872483306748e-07, "loss": 0.9715, "step": 10592 }, { "epoch": 0.8166049953746531, "grad_norm": 3.499938488006592, "learning_rate": 8.564883762245313e-07, "loss": 0.9841, "step": 10593 }, { "epoch": 0.8166820844896701, "grad_norm": 4.210318565368652, "learning_rate": 8.55789762444878e-07, "loss": 0.9848, "step": 10594 }, { "epoch": 0.816759173604687, "grad_norm": 3.527986526489258, "learning_rate": 8.550914070352695e-07, "loss": 0.9264, "step": 10595 }, { "epoch": 0.816836262719704, "grad_norm": 3.987180471420288, "learning_rate": 8.54393310039246e-07, "loss": 0.9992, "step": 10596 }, { "epoch": 0.8169133518347209, "grad_norm": 3.61505389213562, "learning_rate": 8.536954715003276e-07, "loss": 0.8168, "step": 10597 }, { "epoch": 0.8169904409497379, "grad_norm": 3.735048294067383, "learning_rate": 8.529978914620219e-07, "loss": 0.8828, "step": 10598 }, { "epoch": 0.8170675300647549, "grad_norm": 4.112254619598389, "learning_rate": 8.523005699678205e-07, "loss": 0.9597, "step": 10599 }, { "epoch": 0.8171446191797718, "grad_norm": 4.099547386169434, "learning_rate": 8.516035070611967e-07, "loss": 0.9305, "step": 10600 }, { "epoch": 0.8172217082947888, "grad_norm": 3.74941086769104, "learning_rate": 8.509067027856093e-07, "loss": 0.8205, "step": 10601 }, { "epoch": 0.8172987974098057, "grad_norm": 3.4929075241088867, "learning_rate": 8.502101571845001e-07, "loss": 0.9547, "step": 10602 }, { "epoch": 0.8173758865248227, "grad_norm": 4.058771133422852, "learning_rate": 8.495138703012957e-07, "loss": 0.983, "step": 10603 }, { "epoch": 0.8174529756398397, "grad_norm": 3.9835104942321777, "learning_rate": 8.488178421794047e-07, "loss": 0.9632, "step": 10604 }, { "epoch": 0.8175300647548566, "grad_norm": 4.225916862487793, "learning_rate": 8.481220728622225e-07, "loss": 0.9406, "step": 10605 }, { "epoch": 0.8176071538698736, "grad_norm": 3.349649429321289, "learning_rate": 8.474265623931272e-07, "loss": 0.7666, "step": 10606 }, { "epoch": 0.8176842429848905, "grad_norm": 3.722669839859009, "learning_rate": 8.467313108154773e-07, "loss": 0.939, "step": 10607 }, { "epoch": 0.8177613320999075, "grad_norm": 3.4461019039154053, "learning_rate": 8.4603631817262e-07, "loss": 0.8237, "step": 10608 }, { "epoch": 0.8178384212149244, "grad_norm": 3.774502992630005, "learning_rate": 8.453415845078844e-07, "loss": 0.8583, "step": 10609 }, { "epoch": 0.8179155103299414, "grad_norm": 3.614598512649536, "learning_rate": 8.446471098645831e-07, "loss": 0.9551, "step": 10610 }, { "epoch": 0.8179925994449584, "grad_norm": 3.7169055938720703, "learning_rate": 8.439528942860137e-07, "loss": 0.8629, "step": 10611 }, { "epoch": 0.8180696885599753, "grad_norm": 3.7345728874206543, "learning_rate": 8.432589378154582e-07, "loss": 0.9106, "step": 10612 }, { "epoch": 0.8181467776749923, "grad_norm": 4.463329315185547, "learning_rate": 8.425652404961781e-07, "loss": 0.9058, "step": 10613 }, { "epoch": 0.8182238667900092, "grad_norm": 3.3270933628082275, "learning_rate": 8.418718023714235e-07, "loss": 0.8002, "step": 10614 }, { "epoch": 0.8183009559050262, "grad_norm": 3.8927876949310303, "learning_rate": 8.41178623484426e-07, "loss": 0.8976, "step": 10615 }, { "epoch": 0.8183780450200432, "grad_norm": 3.9593920707702637, "learning_rate": 8.404857038784026e-07, "loss": 0.9744, "step": 10616 }, { "epoch": 0.8184551341350601, "grad_norm": 3.8429269790649414, "learning_rate": 8.39793043596554e-07, "loss": 0.8321, "step": 10617 }, { "epoch": 0.8185322232500771, "grad_norm": 3.442072629928589, "learning_rate": 8.391006426820619e-07, "loss": 0.8208, "step": 10618 }, { "epoch": 0.818609312365094, "grad_norm": 3.8619630336761475, "learning_rate": 8.384085011780946e-07, "loss": 0.8541, "step": 10619 }, { "epoch": 0.818686401480111, "grad_norm": 3.815845489501953, "learning_rate": 8.377166191278036e-07, "loss": 0.9521, "step": 10620 }, { "epoch": 0.818763490595128, "grad_norm": 3.9525768756866455, "learning_rate": 8.370249965743249e-07, "loss": 0.9698, "step": 10621 }, { "epoch": 0.8188405797101449, "grad_norm": 3.71791934967041, "learning_rate": 8.363336335607769e-07, "loss": 1.037, "step": 10622 }, { "epoch": 0.8189176688251619, "grad_norm": 3.4764459133148193, "learning_rate": 8.356425301302639e-07, "loss": 0.8571, "step": 10623 }, { "epoch": 0.8189947579401788, "grad_norm": 4.280252933502197, "learning_rate": 8.3495168632587e-07, "loss": 1.067, "step": 10624 }, { "epoch": 0.8190718470551958, "grad_norm": 3.945054769515991, "learning_rate": 8.342611021906672e-07, "loss": 0.9154, "step": 10625 }, { "epoch": 0.8191489361702128, "grad_norm": 3.6436848640441895, "learning_rate": 8.335707777677099e-07, "loss": 0.8429, "step": 10626 }, { "epoch": 0.8192260252852297, "grad_norm": 3.7490007877349854, "learning_rate": 8.32880713100036e-07, "loss": 0.9217, "step": 10627 }, { "epoch": 0.8193031144002467, "grad_norm": 3.5114471912384033, "learning_rate": 8.321909082306684e-07, "loss": 0.9029, "step": 10628 }, { "epoch": 0.8193802035152636, "grad_norm": 3.8217241764068604, "learning_rate": 8.315013632026114e-07, "loss": 0.861, "step": 10629 }, { "epoch": 0.8194572926302806, "grad_norm": 3.3438620567321777, "learning_rate": 8.308120780588553e-07, "loss": 0.7675, "step": 10630 }, { "epoch": 0.8195343817452976, "grad_norm": 4.367412090301514, "learning_rate": 8.301230528423726e-07, "loss": 0.934, "step": 10631 }, { "epoch": 0.8196114708603145, "grad_norm": 4.256845951080322, "learning_rate": 8.29434287596122e-07, "loss": 0.9082, "step": 10632 }, { "epoch": 0.8196885599753315, "grad_norm": 3.7340166568756104, "learning_rate": 8.287457823630429e-07, "loss": 0.9485, "step": 10633 }, { "epoch": 0.8197656490903484, "grad_norm": 3.8312957286834717, "learning_rate": 8.280575371860627e-07, "loss": 0.8524, "step": 10634 }, { "epoch": 0.8198427382053654, "grad_norm": 3.471848726272583, "learning_rate": 8.273695521080866e-07, "loss": 0.8596, "step": 10635 }, { "epoch": 0.8199198273203824, "grad_norm": 3.743417978286743, "learning_rate": 8.266818271720078e-07, "loss": 0.984, "step": 10636 }, { "epoch": 0.8199969164353993, "grad_norm": 3.6192376613616943, "learning_rate": 8.259943624207034e-07, "loss": 0.9746, "step": 10637 }, { "epoch": 0.8200740055504163, "grad_norm": 3.581130027770996, "learning_rate": 8.253071578970329e-07, "loss": 0.8544, "step": 10638 }, { "epoch": 0.8201510946654332, "grad_norm": 3.57409405708313, "learning_rate": 8.246202136438408e-07, "loss": 0.7728, "step": 10639 }, { "epoch": 0.8202281837804501, "grad_norm": 3.536517858505249, "learning_rate": 8.239335297039525e-07, "loss": 0.8483, "step": 10640 }, { "epoch": 0.8203052728954672, "grad_norm": 3.8591723442077637, "learning_rate": 8.232471061201797e-07, "loss": 0.9938, "step": 10641 }, { "epoch": 0.8203823620104841, "grad_norm": 3.6102797985076904, "learning_rate": 8.225609429353187e-07, "loss": 0.9334, "step": 10642 }, { "epoch": 0.8204594511255011, "grad_norm": 3.6389827728271484, "learning_rate": 8.218750401921466e-07, "loss": 0.8946, "step": 10643 }, { "epoch": 0.820536540240518, "grad_norm": 3.4284937381744385, "learning_rate": 8.211893979334284e-07, "loss": 0.7989, "step": 10644 }, { "epoch": 0.820613629355535, "grad_norm": 3.6757824420928955, "learning_rate": 8.205040162019073e-07, "loss": 0.8724, "step": 10645 }, { "epoch": 0.820690718470552, "grad_norm": 4.049697399139404, "learning_rate": 8.198188950403146e-07, "loss": 0.9266, "step": 10646 }, { "epoch": 0.8207678075855689, "grad_norm": 3.7433371543884277, "learning_rate": 8.191340344913629e-07, "loss": 0.9927, "step": 10647 }, { "epoch": 0.8208448967005859, "grad_norm": 3.705571174621582, "learning_rate": 8.184494345977517e-07, "loss": 0.8741, "step": 10648 }, { "epoch": 0.8209219858156028, "grad_norm": 3.779428482055664, "learning_rate": 8.177650954021632e-07, "loss": 0.9699, "step": 10649 }, { "epoch": 0.8209990749306197, "grad_norm": 3.6336560249328613, "learning_rate": 8.170810169472593e-07, "loss": 0.8423, "step": 10650 }, { "epoch": 0.8210761640456368, "grad_norm": 3.8812954425811768, "learning_rate": 8.163971992756897e-07, "loss": 0.9858, "step": 10651 }, { "epoch": 0.8211532531606537, "grad_norm": 3.7961621284484863, "learning_rate": 8.157136424300877e-07, "loss": 0.9314, "step": 10652 }, { "epoch": 0.8212303422756707, "grad_norm": 3.7296488285064697, "learning_rate": 8.15030346453069e-07, "loss": 1.0059, "step": 10653 }, { "epoch": 0.8213074313906876, "grad_norm": 3.9131686687469482, "learning_rate": 8.143473113872353e-07, "loss": 0.8374, "step": 10654 }, { "epoch": 0.8213845205057045, "grad_norm": 4.030622482299805, "learning_rate": 8.136645372751667e-07, "loss": 0.8713, "step": 10655 }, { "epoch": 0.8214616096207216, "grad_norm": 3.774033308029175, "learning_rate": 8.129820241594333e-07, "loss": 0.9483, "step": 10656 }, { "epoch": 0.8215386987357385, "grad_norm": 3.8504068851470947, "learning_rate": 8.122997720825853e-07, "loss": 1.045, "step": 10657 }, { "epoch": 0.8216157878507555, "grad_norm": 3.6778900623321533, "learning_rate": 8.116177810871578e-07, "loss": 0.8602, "step": 10658 }, { "epoch": 0.8216928769657724, "grad_norm": 3.6927058696746826, "learning_rate": 8.109360512156695e-07, "loss": 0.9418, "step": 10659 }, { "epoch": 0.8217699660807893, "grad_norm": 3.3587496280670166, "learning_rate": 8.10254582510624e-07, "loss": 0.8898, "step": 10660 }, { "epoch": 0.8218470551958064, "grad_norm": 3.8647491931915283, "learning_rate": 8.095733750145046e-07, "loss": 0.9488, "step": 10661 }, { "epoch": 0.8219241443108233, "grad_norm": 3.846730947494507, "learning_rate": 8.088924287697824e-07, "loss": 0.9386, "step": 10662 }, { "epoch": 0.8220012334258403, "grad_norm": 3.8304007053375244, "learning_rate": 8.082117438189113e-07, "loss": 0.9066, "step": 10663 }, { "epoch": 0.8220783225408572, "grad_norm": 3.793449878692627, "learning_rate": 8.075313202043277e-07, "loss": 0.8526, "step": 10664 }, { "epoch": 0.8221554116558741, "grad_norm": 4.21845006942749, "learning_rate": 8.068511579684546e-07, "loss": 1.0152, "step": 10665 }, { "epoch": 0.8222325007708912, "grad_norm": 3.6791608333587646, "learning_rate": 8.06171257153694e-07, "loss": 0.9214, "step": 10666 }, { "epoch": 0.8223095898859081, "grad_norm": 3.7897164821624756, "learning_rate": 8.054916178024347e-07, "loss": 0.9361, "step": 10667 }, { "epoch": 0.8223866790009251, "grad_norm": 3.942296266555786, "learning_rate": 8.04812239957049e-07, "loss": 1.037, "step": 10668 }, { "epoch": 0.822463768115942, "grad_norm": 3.920706272125244, "learning_rate": 8.041331236598926e-07, "loss": 0.8321, "step": 10669 }, { "epoch": 0.822540857230959, "grad_norm": 3.6586873531341553, "learning_rate": 8.034542689533053e-07, "loss": 0.9324, "step": 10670 }, { "epoch": 0.822617946345976, "grad_norm": 3.7215545177459717, "learning_rate": 8.027756758796107e-07, "loss": 0.884, "step": 10671 }, { "epoch": 0.8226950354609929, "grad_norm": 3.4907851219177246, "learning_rate": 8.020973444811142e-07, "loss": 0.801, "step": 10672 }, { "epoch": 0.8227721245760099, "grad_norm": 3.6691055297851562, "learning_rate": 8.014192748001059e-07, "loss": 0.9102, "step": 10673 }, { "epoch": 0.8228492136910268, "grad_norm": 3.8015921115875244, "learning_rate": 8.007414668788616e-07, "loss": 0.924, "step": 10674 }, { "epoch": 0.8229263028060438, "grad_norm": 3.784499168395996, "learning_rate": 8.000639207596377e-07, "loss": 0.928, "step": 10675 }, { "epoch": 0.8230033919210608, "grad_norm": 3.817927122116089, "learning_rate": 7.993866364846781e-07, "loss": 0.892, "step": 10676 }, { "epoch": 0.8230804810360777, "grad_norm": 3.8663864135742188, "learning_rate": 7.987096140962047e-07, "loss": 0.8445, "step": 10677 }, { "epoch": 0.8231575701510947, "grad_norm": 4.127407073974609, "learning_rate": 7.980328536364279e-07, "loss": 0.914, "step": 10678 }, { "epoch": 0.8232346592661116, "grad_norm": 3.5715925693511963, "learning_rate": 7.9735635514754e-07, "loss": 0.924, "step": 10679 }, { "epoch": 0.8233117483811286, "grad_norm": 3.3841607570648193, "learning_rate": 7.966801186717172e-07, "loss": 0.805, "step": 10680 }, { "epoch": 0.8233888374961456, "grad_norm": 3.75689435005188, "learning_rate": 7.960041442511196e-07, "loss": 0.8059, "step": 10681 }, { "epoch": 0.8234659266111625, "grad_norm": 3.8247830867767334, "learning_rate": 7.95328431927892e-07, "loss": 0.9118, "step": 10682 }, { "epoch": 0.8235430157261795, "grad_norm": 4.0702385902404785, "learning_rate": 7.946529817441584e-07, "loss": 0.9658, "step": 10683 }, { "epoch": 0.8236201048411964, "grad_norm": 3.605431318283081, "learning_rate": 7.939777937420317e-07, "loss": 0.9196, "step": 10684 }, { "epoch": 0.8236971939562134, "grad_norm": 3.7044453620910645, "learning_rate": 7.933028679636057e-07, "loss": 0.908, "step": 10685 }, { "epoch": 0.8237742830712304, "grad_norm": 3.886737585067749, "learning_rate": 7.926282044509593e-07, "loss": 0.9069, "step": 10686 }, { "epoch": 0.8238513721862473, "grad_norm": 3.6313858032226562, "learning_rate": 7.919538032461544e-07, "loss": 0.9743, "step": 10687 }, { "epoch": 0.8239284613012643, "grad_norm": 3.607703924179077, "learning_rate": 7.912796643912352e-07, "loss": 0.8587, "step": 10688 }, { "epoch": 0.8240055504162812, "grad_norm": 3.6826751232147217, "learning_rate": 7.906057879282314e-07, "loss": 0.9056, "step": 10689 }, { "epoch": 0.8240826395312982, "grad_norm": 4.050000190734863, "learning_rate": 7.89932173899155e-07, "loss": 0.8867, "step": 10690 }, { "epoch": 0.8241597286463151, "grad_norm": 3.8793132305145264, "learning_rate": 7.89258822346004e-07, "loss": 0.8438, "step": 10691 }, { "epoch": 0.8242368177613321, "grad_norm": 3.8853375911712646, "learning_rate": 7.885857333107566e-07, "loss": 0.8774, "step": 10692 }, { "epoch": 0.8243139068763491, "grad_norm": 3.8986237049102783, "learning_rate": 7.87912906835378e-07, "loss": 0.9329, "step": 10693 }, { "epoch": 0.824390995991366, "grad_norm": 3.8146276473999023, "learning_rate": 7.872403429618141e-07, "loss": 0.9111, "step": 10694 }, { "epoch": 0.824468085106383, "grad_norm": 3.530385732650757, "learning_rate": 7.865680417319965e-07, "loss": 0.9361, "step": 10695 }, { "epoch": 0.8245451742214, "grad_norm": 3.7008519172668457, "learning_rate": 7.858960031878399e-07, "loss": 0.905, "step": 10696 }, { "epoch": 0.8246222633364169, "grad_norm": 3.490631103515625, "learning_rate": 7.852242273712429e-07, "loss": 0.8935, "step": 10697 }, { "epoch": 0.8246993524514339, "grad_norm": 4.064510345458984, "learning_rate": 7.845527143240856e-07, "loss": 0.93, "step": 10698 }, { "epoch": 0.8247764415664508, "grad_norm": 3.9456470012664795, "learning_rate": 7.838814640882342e-07, "loss": 0.9109, "step": 10699 }, { "epoch": 0.8248535306814678, "grad_norm": 4.111172676086426, "learning_rate": 7.832104767055376e-07, "loss": 0.9342, "step": 10700 }, { "epoch": 0.8249306197964847, "grad_norm": 3.4403626918792725, "learning_rate": 7.82539752217828e-07, "loss": 0.972, "step": 10701 }, { "epoch": 0.8250077089115017, "grad_norm": 3.8715031147003174, "learning_rate": 7.81869290666924e-07, "loss": 0.7921, "step": 10702 }, { "epoch": 0.8250847980265187, "grad_norm": 3.712979793548584, "learning_rate": 7.811990920946216e-07, "loss": 1.0237, "step": 10703 }, { "epoch": 0.8251618871415356, "grad_norm": 3.841836452484131, "learning_rate": 7.805291565427065e-07, "loss": 1.0109, "step": 10704 }, { "epoch": 0.8252389762565526, "grad_norm": 3.964392900466919, "learning_rate": 7.79859484052945e-07, "loss": 0.8749, "step": 10705 }, { "epoch": 0.8253160653715695, "grad_norm": 3.964123010635376, "learning_rate": 7.791900746670883e-07, "loss": 0.9281, "step": 10706 }, { "epoch": 0.8253931544865865, "grad_norm": 3.7393078804016113, "learning_rate": 7.785209284268702e-07, "loss": 0.8826, "step": 10707 }, { "epoch": 0.8254702436016035, "grad_norm": 3.9628257751464844, "learning_rate": 7.778520453740096e-07, "loss": 0.9923, "step": 10708 }, { "epoch": 0.8255473327166204, "grad_norm": 3.574497699737549, "learning_rate": 7.771834255502059e-07, "loss": 0.8425, "step": 10709 }, { "epoch": 0.8256244218316374, "grad_norm": 3.801563262939453, "learning_rate": 7.765150689971451e-07, "loss": 0.8815, "step": 10710 }, { "epoch": 0.8257015109466543, "grad_norm": 3.759230613708496, "learning_rate": 7.758469757564957e-07, "loss": 0.8957, "step": 10711 }, { "epoch": 0.8257786000616713, "grad_norm": 3.4054291248321533, "learning_rate": 7.751791458699099e-07, "loss": 0.8231, "step": 10712 }, { "epoch": 0.8258556891766883, "grad_norm": 3.554560422897339, "learning_rate": 7.745115793790247e-07, "loss": 0.9277, "step": 10713 }, { "epoch": 0.8259327782917052, "grad_norm": 3.6641149520874023, "learning_rate": 7.738442763254573e-07, "loss": 0.9879, "step": 10714 }, { "epoch": 0.8260098674067222, "grad_norm": 3.7249083518981934, "learning_rate": 7.731772367508111e-07, "loss": 0.8303, "step": 10715 }, { "epoch": 0.8260869565217391, "grad_norm": 3.779303789138794, "learning_rate": 7.725104606966727e-07, "loss": 0.9721, "step": 10716 }, { "epoch": 0.826164045636756, "grad_norm": 3.8608760833740234, "learning_rate": 7.718439482046125e-07, "loss": 0.871, "step": 10717 }, { "epoch": 0.8262411347517731, "grad_norm": 3.9529666900634766, "learning_rate": 7.711776993161841e-07, "loss": 1.0389, "step": 10718 }, { "epoch": 0.82631822386679, "grad_norm": 3.6020145416259766, "learning_rate": 7.705117140729257e-07, "loss": 0.7875, "step": 10719 }, { "epoch": 0.826395312981807, "grad_norm": 4.178860664367676, "learning_rate": 7.698459925163554e-07, "loss": 1.0956, "step": 10720 }, { "epoch": 0.8264724020968239, "grad_norm": 3.685406446456909, "learning_rate": 7.691805346879794e-07, "loss": 0.9837, "step": 10721 }, { "epoch": 0.8265494912118408, "grad_norm": 3.7086429595947266, "learning_rate": 7.685153406292845e-07, "loss": 1.0095, "step": 10722 }, { "epoch": 0.8266265803268579, "grad_norm": 3.816329002380371, "learning_rate": 7.67850410381743e-07, "loss": 0.9181, "step": 10723 }, { "epoch": 0.8267036694418748, "grad_norm": 3.774265766143799, "learning_rate": 7.671857439868107e-07, "loss": 0.8757, "step": 10724 }, { "epoch": 0.8267807585568918, "grad_norm": 4.03645133972168, "learning_rate": 7.665213414859235e-07, "loss": 0.9412, "step": 10725 }, { "epoch": 0.8268578476719087, "grad_norm": 3.7689404487609863, "learning_rate": 7.658572029205052e-07, "loss": 0.8735, "step": 10726 }, { "epoch": 0.8269349367869256, "grad_norm": 3.5467817783355713, "learning_rate": 7.651933283319613e-07, "loss": 0.8445, "step": 10727 }, { "epoch": 0.8270120259019427, "grad_norm": 4.0023088455200195, "learning_rate": 7.645297177616806e-07, "loss": 0.9186, "step": 10728 }, { "epoch": 0.8270891150169596, "grad_norm": 3.534449577331543, "learning_rate": 7.638663712510358e-07, "loss": 0.8886, "step": 10729 }, { "epoch": 0.8271662041319766, "grad_norm": 3.6895134449005127, "learning_rate": 7.632032888413848e-07, "loss": 0.798, "step": 10730 }, { "epoch": 0.8272432932469935, "grad_norm": 3.8588826656341553, "learning_rate": 7.625404705740641e-07, "loss": 0.8937, "step": 10731 }, { "epoch": 0.8273203823620104, "grad_norm": 3.6557419300079346, "learning_rate": 7.618779164903988e-07, "loss": 0.8956, "step": 10732 }, { "epoch": 0.8273974714770275, "grad_norm": 3.7859385013580322, "learning_rate": 7.612156266316962e-07, "loss": 0.927, "step": 10733 }, { "epoch": 0.8274745605920444, "grad_norm": 3.476879358291626, "learning_rate": 7.60553601039245e-07, "loss": 0.869, "step": 10734 }, { "epoch": 0.8275516497070614, "grad_norm": 3.6959054470062256, "learning_rate": 7.598918397543209e-07, "loss": 0.8605, "step": 10735 }, { "epoch": 0.8276287388220783, "grad_norm": 3.761538505554199, "learning_rate": 7.592303428181802e-07, "loss": 0.9357, "step": 10736 }, { "epoch": 0.8277058279370952, "grad_norm": 3.5777299404144287, "learning_rate": 7.585691102720643e-07, "loss": 0.8868, "step": 10737 }, { "epoch": 0.8277829170521123, "grad_norm": 3.6994411945343018, "learning_rate": 7.579081421571976e-07, "loss": 0.8553, "step": 10738 }, { "epoch": 0.8278600061671292, "grad_norm": 3.478304862976074, "learning_rate": 7.572474385147888e-07, "loss": 0.8501, "step": 10739 }, { "epoch": 0.8279370952821462, "grad_norm": 3.7842917442321777, "learning_rate": 7.56586999386027e-07, "loss": 0.7966, "step": 10740 }, { "epoch": 0.8280141843971631, "grad_norm": 3.9170117378234863, "learning_rate": 7.559268248120887e-07, "loss": 1.0096, "step": 10741 }, { "epoch": 0.82809127351218, "grad_norm": 3.4382944107055664, "learning_rate": 7.55266914834133e-07, "loss": 0.8735, "step": 10742 }, { "epoch": 0.8281683626271971, "grad_norm": 3.6370790004730225, "learning_rate": 7.546072694933004e-07, "loss": 0.9112, "step": 10743 }, { "epoch": 0.828245451742214, "grad_norm": 4.061718463897705, "learning_rate": 7.539478888307173e-07, "loss": 1.021, "step": 10744 }, { "epoch": 0.828322540857231, "grad_norm": 3.542180061340332, "learning_rate": 7.532887728874933e-07, "loss": 0.8531, "step": 10745 }, { "epoch": 0.8283996299722479, "grad_norm": 3.725696325302124, "learning_rate": 7.526299217047195e-07, "loss": 1.0043, "step": 10746 }, { "epoch": 0.8284767190872648, "grad_norm": 3.6887924671173096, "learning_rate": 7.51971335323472e-07, "loss": 0.9136, "step": 10747 }, { "epoch": 0.8285538082022819, "grad_norm": 3.769517660140991, "learning_rate": 7.513130137848101e-07, "loss": 0.8516, "step": 10748 }, { "epoch": 0.8286308973172988, "grad_norm": 3.488903522491455, "learning_rate": 7.506549571297783e-07, "loss": 0.7989, "step": 10749 }, { "epoch": 0.8287079864323158, "grad_norm": 3.627612352371216, "learning_rate": 7.499971653994026e-07, "loss": 0.8622, "step": 10750 }, { "epoch": 0.8287850755473327, "grad_norm": 3.5904018878936768, "learning_rate": 7.493396386346913e-07, "loss": 0.8568, "step": 10751 }, { "epoch": 0.8288621646623496, "grad_norm": 3.8833725452423096, "learning_rate": 7.486823768766388e-07, "loss": 0.8596, "step": 10752 }, { "epoch": 0.8289392537773667, "grad_norm": 3.913623809814453, "learning_rate": 7.480253801662219e-07, "loss": 1.0065, "step": 10753 }, { "epoch": 0.8290163428923836, "grad_norm": 3.8650975227355957, "learning_rate": 7.473686485444009e-07, "loss": 0.8731, "step": 10754 }, { "epoch": 0.8290934320074006, "grad_norm": 3.8849239349365234, "learning_rate": 7.467121820521194e-07, "loss": 0.9002, "step": 10755 }, { "epoch": 0.8291705211224175, "grad_norm": 3.877471685409546, "learning_rate": 7.46055980730307e-07, "loss": 0.8938, "step": 10756 }, { "epoch": 0.8292476102374344, "grad_norm": 3.7837424278259277, "learning_rate": 7.454000446198706e-07, "loss": 0.9011, "step": 10757 }, { "epoch": 0.8293246993524515, "grad_norm": 3.6548290252685547, "learning_rate": 7.447443737617066e-07, "loss": 0.9058, "step": 10758 }, { "epoch": 0.8294017884674684, "grad_norm": 3.6717207431793213, "learning_rate": 7.440889681966923e-07, "loss": 0.9047, "step": 10759 }, { "epoch": 0.8294788775824854, "grad_norm": 3.3943278789520264, "learning_rate": 7.434338279656889e-07, "loss": 0.8617, "step": 10760 }, { "epoch": 0.8295559666975023, "grad_norm": 3.89233660697937, "learning_rate": 7.427789531095425e-07, "loss": 0.9176, "step": 10761 }, { "epoch": 0.8296330558125192, "grad_norm": 3.8601460456848145, "learning_rate": 7.421243436690778e-07, "loss": 0.8272, "step": 10762 }, { "epoch": 0.8297101449275363, "grad_norm": 3.9159624576568604, "learning_rate": 7.414699996851088e-07, "loss": 0.977, "step": 10763 }, { "epoch": 0.8297872340425532, "grad_norm": 3.4939470291137695, "learning_rate": 7.408159211984294e-07, "loss": 0.8306, "step": 10764 }, { "epoch": 0.8298643231575702, "grad_norm": 3.7393009662628174, "learning_rate": 7.401621082498189e-07, "loss": 0.9836, "step": 10765 }, { "epoch": 0.8299414122725871, "grad_norm": 3.700958013534546, "learning_rate": 7.395085608800384e-07, "loss": 0.854, "step": 10766 }, { "epoch": 0.830018501387604, "grad_norm": 4.00097131729126, "learning_rate": 7.388552791298343e-07, "loss": 0.9573, "step": 10767 }, { "epoch": 0.830095590502621, "grad_norm": 3.7124149799346924, "learning_rate": 7.382022630399338e-07, "loss": 0.925, "step": 10768 }, { "epoch": 0.830172679617638, "grad_norm": 3.643216371536255, "learning_rate": 7.375495126510496e-07, "loss": 0.8348, "step": 10769 }, { "epoch": 0.830249768732655, "grad_norm": 11.0379638671875, "learning_rate": 7.368970280038778e-07, "loss": 0.8255, "step": 10770 }, { "epoch": 0.8303268578476719, "grad_norm": 3.5740654468536377, "learning_rate": 7.362448091390966e-07, "loss": 0.8926, "step": 10771 }, { "epoch": 0.8304039469626888, "grad_norm": 3.759594440460205, "learning_rate": 7.355928560973707e-07, "loss": 0.8353, "step": 10772 }, { "epoch": 0.8304810360777058, "grad_norm": 3.6525447368621826, "learning_rate": 7.349411689193426e-07, "loss": 0.8027, "step": 10773 }, { "epoch": 0.8305581251927228, "grad_norm": 3.7419726848602295, "learning_rate": 7.34289747645644e-07, "loss": 0.8801, "step": 10774 }, { "epoch": 0.8306352143077398, "grad_norm": 3.5162227153778076, "learning_rate": 7.336385923168865e-07, "loss": 0.8258, "step": 10775 }, { "epoch": 0.8307123034227567, "grad_norm": 3.819016933441162, "learning_rate": 7.329877029736665e-07, "loss": 0.9143, "step": 10776 }, { "epoch": 0.8307893925377736, "grad_norm": 3.756284475326538, "learning_rate": 7.323370796565637e-07, "loss": 0.9504, "step": 10777 }, { "epoch": 0.8308664816527906, "grad_norm": 3.6436846256256104, "learning_rate": 7.31686722406143e-07, "loss": 0.9968, "step": 10778 }, { "epoch": 0.8309435707678076, "grad_norm": 3.641303300857544, "learning_rate": 7.310366312629475e-07, "loss": 0.949, "step": 10779 }, { "epoch": 0.8310206598828246, "grad_norm": 3.9875218868255615, "learning_rate": 7.303868062675073e-07, "loss": 0.9923, "step": 10780 }, { "epoch": 0.8310977489978415, "grad_norm": 3.8598506450653076, "learning_rate": 7.297372474603381e-07, "loss": 1.0225, "step": 10781 }, { "epoch": 0.8311748381128584, "grad_norm": 3.6692776679992676, "learning_rate": 7.290879548819363e-07, "loss": 0.8962, "step": 10782 }, { "epoch": 0.8312519272278754, "grad_norm": 3.7915146350860596, "learning_rate": 7.284389285727799e-07, "loss": 0.953, "step": 10783 }, { "epoch": 0.8313290163428924, "grad_norm": 3.9026849269866943, "learning_rate": 7.277901685733335e-07, "loss": 0.9768, "step": 10784 }, { "epoch": 0.8314061054579094, "grad_norm": 3.8057961463928223, "learning_rate": 7.271416749240435e-07, "loss": 0.9183, "step": 10785 }, { "epoch": 0.8314831945729263, "grad_norm": 3.617309808731079, "learning_rate": 7.264934476653401e-07, "loss": 0.9085, "step": 10786 }, { "epoch": 0.8315602836879432, "grad_norm": 3.680267572402954, "learning_rate": 7.258454868376385e-07, "loss": 0.8003, "step": 10787 }, { "epoch": 0.8316373728029602, "grad_norm": 3.6309967041015625, "learning_rate": 7.251977924813336e-07, "loss": 0.9443, "step": 10788 }, { "epoch": 0.8317144619179772, "grad_norm": 3.5176992416381836, "learning_rate": 7.245503646368063e-07, "loss": 0.8685, "step": 10789 }, { "epoch": 0.8317915510329942, "grad_norm": 3.7374002933502197, "learning_rate": 7.239032033444205e-07, "loss": 0.8479, "step": 10790 }, { "epoch": 0.8318686401480111, "grad_norm": 4.19711446762085, "learning_rate": 7.232563086445238e-07, "loss": 0.9312, "step": 10791 }, { "epoch": 0.831945729263028, "grad_norm": 3.7935373783111572, "learning_rate": 7.226096805774463e-07, "loss": 1.0013, "step": 10792 }, { "epoch": 0.832022818378045, "grad_norm": 3.825516700744629, "learning_rate": 7.21963319183503e-07, "loss": 0.8755, "step": 10793 }, { "epoch": 0.832099907493062, "grad_norm": 3.986665725708008, "learning_rate": 7.213172245029892e-07, "loss": 0.889, "step": 10794 }, { "epoch": 0.832176996608079, "grad_norm": 3.7849297523498535, "learning_rate": 7.206713965761864e-07, "loss": 0.9522, "step": 10795 }, { "epoch": 0.8322540857230959, "grad_norm": 3.8678035736083984, "learning_rate": 7.200258354433593e-07, "loss": 0.9428, "step": 10796 }, { "epoch": 0.8323311748381128, "grad_norm": 3.5105371475219727, "learning_rate": 7.193805411447546e-07, "loss": 0.7586, "step": 10797 }, { "epoch": 0.8324082639531298, "grad_norm": 4.333858966827393, "learning_rate": 7.187355137206042e-07, "loss": 0.8917, "step": 10798 }, { "epoch": 0.8324853530681467, "grad_norm": 3.7516279220581055, "learning_rate": 7.180907532111203e-07, "loss": 0.9158, "step": 10799 }, { "epoch": 0.8325624421831638, "grad_norm": 3.8405489921569824, "learning_rate": 7.174462596565012e-07, "loss": 0.9124, "step": 10800 }, { "epoch": 0.8326395312981807, "grad_norm": 4.030445098876953, "learning_rate": 7.168020330969283e-07, "loss": 0.9262, "step": 10801 }, { "epoch": 0.8327166204131976, "grad_norm": 4.261043548583984, "learning_rate": 7.161580735725648e-07, "loss": 1.0228, "step": 10802 }, { "epoch": 0.8327937095282146, "grad_norm": 4.2174973487854, "learning_rate": 7.155143811235593e-07, "loss": 1.0124, "step": 10803 }, { "epoch": 0.8328707986432315, "grad_norm": 4.092016696929932, "learning_rate": 7.14870955790043e-07, "loss": 0.8879, "step": 10804 }, { "epoch": 0.8329478877582486, "grad_norm": 3.6013119220733643, "learning_rate": 7.142277976121287e-07, "loss": 0.8734, "step": 10805 }, { "epoch": 0.8330249768732655, "grad_norm": 4.047834396362305, "learning_rate": 7.135849066299144e-07, "loss": 0.9181, "step": 10806 }, { "epoch": 0.8331020659882824, "grad_norm": 3.696688413619995, "learning_rate": 7.129422828834809e-07, "loss": 0.8078, "step": 10807 }, { "epoch": 0.8331791551032994, "grad_norm": 3.918166399002075, "learning_rate": 7.122999264128933e-07, "loss": 0.8543, "step": 10808 }, { "epoch": 0.8332562442183163, "grad_norm": 3.7116942405700684, "learning_rate": 7.116578372581995e-07, "loss": 0.8861, "step": 10809 }, { "epoch": 0.8333333333333334, "grad_norm": 3.7015819549560547, "learning_rate": 7.110160154594287e-07, "loss": 0.8536, "step": 10810 }, { "epoch": 0.8334104224483503, "grad_norm": 3.7466604709625244, "learning_rate": 7.10374461056596e-07, "loss": 0.9696, "step": 10811 }, { "epoch": 0.8334875115633672, "grad_norm": 3.6178178787231445, "learning_rate": 7.097331740896995e-07, "loss": 0.9173, "step": 10812 }, { "epoch": 0.8335646006783842, "grad_norm": 3.8105711936950684, "learning_rate": 7.090921545987195e-07, "loss": 0.9118, "step": 10813 }, { "epoch": 0.8336416897934011, "grad_norm": 3.8142049312591553, "learning_rate": 7.0845140262362e-07, "loss": 0.7892, "step": 10814 }, { "epoch": 0.8337187789084182, "grad_norm": 3.651261329650879, "learning_rate": 7.078109182043508e-07, "loss": 0.9293, "step": 10815 }, { "epoch": 0.8337958680234351, "grad_norm": 3.8389205932617188, "learning_rate": 7.071707013808399e-07, "loss": 0.8746, "step": 10816 }, { "epoch": 0.833872957138452, "grad_norm": 4.682302951812744, "learning_rate": 7.065307521930026e-07, "loss": 0.8675, "step": 10817 }, { "epoch": 0.833950046253469, "grad_norm": 3.9559519290924072, "learning_rate": 7.058910706807359e-07, "loss": 0.9481, "step": 10818 }, { "epoch": 0.8340271353684859, "grad_norm": 3.810323715209961, "learning_rate": 7.052516568839218e-07, "loss": 0.9161, "step": 10819 }, { "epoch": 0.834104224483503, "grad_norm": 4.13281774520874, "learning_rate": 7.046125108424245e-07, "loss": 0.9867, "step": 10820 }, { "epoch": 0.8341813135985199, "grad_norm": 3.446863889694214, "learning_rate": 7.039736325960899e-07, "loss": 0.8842, "step": 10821 }, { "epoch": 0.8342584027135368, "grad_norm": 3.840688943862915, "learning_rate": 7.033350221847496e-07, "loss": 0.9211, "step": 10822 }, { "epoch": 0.8343354918285538, "grad_norm": 3.8792099952697754, "learning_rate": 7.026966796482177e-07, "loss": 0.8376, "step": 10823 }, { "epoch": 0.8344125809435707, "grad_norm": 3.9304957389831543, "learning_rate": 7.020586050262912e-07, "loss": 0.919, "step": 10824 }, { "epoch": 0.8344896700585878, "grad_norm": 3.9042816162109375, "learning_rate": 7.014207983587517e-07, "loss": 0.8255, "step": 10825 }, { "epoch": 0.8345667591736047, "grad_norm": 3.54953670501709, "learning_rate": 7.00783259685362e-07, "loss": 0.8494, "step": 10826 }, { "epoch": 0.8346438482886216, "grad_norm": 3.699927568435669, "learning_rate": 7.0014598904587e-07, "loss": 0.7763, "step": 10827 }, { "epoch": 0.8347209374036386, "grad_norm": 3.908418655395508, "learning_rate": 6.995089864800059e-07, "loss": 0.9824, "step": 10828 }, { "epoch": 0.8347980265186555, "grad_norm": 3.541356325149536, "learning_rate": 6.988722520274838e-07, "loss": 0.7651, "step": 10829 }, { "epoch": 0.8348751156336726, "grad_norm": 3.6384029388427734, "learning_rate": 6.98235785728002e-07, "loss": 0.9051, "step": 10830 }, { "epoch": 0.8349522047486895, "grad_norm": 3.667262315750122, "learning_rate": 6.975995876212383e-07, "loss": 0.9101, "step": 10831 }, { "epoch": 0.8350292938637064, "grad_norm": 3.559422492980957, "learning_rate": 6.969636577468575e-07, "loss": 0.8595, "step": 10832 }, { "epoch": 0.8351063829787234, "grad_norm": 4.325561046600342, "learning_rate": 6.963279961445068e-07, "loss": 0.9312, "step": 10833 }, { "epoch": 0.8351834720937403, "grad_norm": 4.272604465484619, "learning_rate": 6.956926028538163e-07, "loss": 0.8992, "step": 10834 }, { "epoch": 0.8352605612087574, "grad_norm": 3.4892845153808594, "learning_rate": 6.950574779144004e-07, "loss": 0.8594, "step": 10835 }, { "epoch": 0.8353376503237743, "grad_norm": 3.5458579063415527, "learning_rate": 6.944226213658534e-07, "loss": 0.9747, "step": 10836 }, { "epoch": 0.8354147394387912, "grad_norm": 3.427591562271118, "learning_rate": 6.937880332477576e-07, "loss": 0.9044, "step": 10837 }, { "epoch": 0.8354918285538082, "grad_norm": 4.049854755401611, "learning_rate": 6.931537135996747e-07, "loss": 0.9564, "step": 10838 }, { "epoch": 0.8355689176688251, "grad_norm": 3.7413408756256104, "learning_rate": 6.925196624611525e-07, "loss": 0.9341, "step": 10839 }, { "epoch": 0.8356460067838422, "grad_norm": 3.709747076034546, "learning_rate": 6.918858798717204e-07, "loss": 0.9402, "step": 10840 }, { "epoch": 0.8357230958988591, "grad_norm": 4.0322465896606445, "learning_rate": 6.912523658708919e-07, "loss": 0.91, "step": 10841 }, { "epoch": 0.8358001850138761, "grad_norm": 3.899702310562134, "learning_rate": 6.906191204981621e-07, "loss": 0.9262, "step": 10842 }, { "epoch": 0.835877274128893, "grad_norm": 3.4912753105163574, "learning_rate": 6.899861437930116e-07, "loss": 0.8982, "step": 10843 }, { "epoch": 0.8359543632439099, "grad_norm": 3.437044620513916, "learning_rate": 6.893534357949022e-07, "loss": 0.8238, "step": 10844 }, { "epoch": 0.836031452358927, "grad_norm": 3.966545820236206, "learning_rate": 6.887209965432812e-07, "loss": 0.977, "step": 10845 }, { "epoch": 0.8361085414739439, "grad_norm": 3.3362553119659424, "learning_rate": 6.880888260775786e-07, "loss": 0.877, "step": 10846 }, { "epoch": 0.8361856305889609, "grad_norm": 3.762795925140381, "learning_rate": 6.874569244372042e-07, "loss": 0.9176, "step": 10847 }, { "epoch": 0.8362627197039778, "grad_norm": 3.6273088455200195, "learning_rate": 6.868252916615553e-07, "loss": 0.8417, "step": 10848 }, { "epoch": 0.8363398088189947, "grad_norm": 3.5637712478637695, "learning_rate": 6.861939277900115e-07, "loss": 0.8944, "step": 10849 }, { "epoch": 0.8364168979340117, "grad_norm": 3.720207929611206, "learning_rate": 6.855628328619341e-07, "loss": 1.0064, "step": 10850 }, { "epoch": 0.8364939870490287, "grad_norm": 3.5952982902526855, "learning_rate": 6.849320069166693e-07, "loss": 0.9475, "step": 10851 }, { "epoch": 0.8365710761640457, "grad_norm": 3.7892141342163086, "learning_rate": 6.843014499935463e-07, "loss": 0.9439, "step": 10852 }, { "epoch": 0.8366481652790626, "grad_norm": 3.6238391399383545, "learning_rate": 6.836711621318753e-07, "loss": 0.8605, "step": 10853 }, { "epoch": 0.8367252543940795, "grad_norm": 3.9006173610687256, "learning_rate": 6.830411433709527e-07, "loss": 0.8448, "step": 10854 }, { "epoch": 0.8368023435090965, "grad_norm": 3.381910800933838, "learning_rate": 6.824113937500565e-07, "loss": 0.9104, "step": 10855 }, { "epoch": 0.8368794326241135, "grad_norm": 3.998650312423706, "learning_rate": 6.817819133084485e-07, "loss": 1.0149, "step": 10856 }, { "epoch": 0.8369565217391305, "grad_norm": 3.4496870040893555, "learning_rate": 6.811527020853748e-07, "loss": 0.8017, "step": 10857 }, { "epoch": 0.8370336108541474, "grad_norm": 3.7553465366363525, "learning_rate": 6.805237601200615e-07, "loss": 0.7632, "step": 10858 }, { "epoch": 0.8371106999691643, "grad_norm": 4.353603839874268, "learning_rate": 6.798950874517201e-07, "loss": 0.9318, "step": 10859 }, { "epoch": 0.8371877890841813, "grad_norm": 3.8863165378570557, "learning_rate": 6.792666841195455e-07, "loss": 0.904, "step": 10860 }, { "epoch": 0.8372648781991983, "grad_norm": 3.8300702571868896, "learning_rate": 6.786385501627157e-07, "loss": 0.8406, "step": 10861 }, { "epoch": 0.8373419673142153, "grad_norm": 3.770839214324951, "learning_rate": 6.780106856203916e-07, "loss": 0.9262, "step": 10862 }, { "epoch": 0.8374190564292322, "grad_norm": 3.5766117572784424, "learning_rate": 6.77383090531718e-07, "loss": 0.868, "step": 10863 }, { "epoch": 0.8374961455442491, "grad_norm": 3.751206159591675, "learning_rate": 6.767557649358203e-07, "loss": 0.8759, "step": 10864 }, { "epoch": 0.8375732346592661, "grad_norm": 3.9734339714050293, "learning_rate": 6.7612870887181e-07, "loss": 0.8378, "step": 10865 }, { "epoch": 0.8376503237742831, "grad_norm": 3.7045814990997314, "learning_rate": 6.755019223787807e-07, "loss": 0.9453, "step": 10866 }, { "epoch": 0.8377274128893001, "grad_norm": 3.88385272026062, "learning_rate": 6.748754054958095e-07, "loss": 0.9384, "step": 10867 }, { "epoch": 0.837804502004317, "grad_norm": 4.15552282333374, "learning_rate": 6.742491582619559e-07, "loss": 0.9795, "step": 10868 }, { "epoch": 0.8378815911193339, "grad_norm": 3.2853026390075684, "learning_rate": 6.736231807162641e-07, "loss": 0.843, "step": 10869 }, { "epoch": 0.8379586802343509, "grad_norm": 3.701760768890381, "learning_rate": 6.7299747289776e-07, "loss": 0.9865, "step": 10870 }, { "epoch": 0.8380357693493679, "grad_norm": 3.7172200679779053, "learning_rate": 6.723720348454538e-07, "loss": 0.8868, "step": 10871 }, { "epoch": 0.8381128584643849, "grad_norm": 3.8276724815368652, "learning_rate": 6.717468665983384e-07, "loss": 0.9274, "step": 10872 }, { "epoch": 0.8381899475794018, "grad_norm": 3.7223312854766846, "learning_rate": 6.711219681953885e-07, "loss": 0.8895, "step": 10873 }, { "epoch": 0.8382670366944187, "grad_norm": 3.972644329071045, "learning_rate": 6.704973396755638e-07, "loss": 0.9633, "step": 10874 }, { "epoch": 0.8383441258094357, "grad_norm": 3.7583634853363037, "learning_rate": 6.698729810778065e-07, "loss": 0.8806, "step": 10875 }, { "epoch": 0.8384212149244527, "grad_norm": 4.294461727142334, "learning_rate": 6.692488924410434e-07, "loss": 0.957, "step": 10876 }, { "epoch": 0.8384983040394697, "grad_norm": 3.7632694244384766, "learning_rate": 6.686250738041816e-07, "loss": 0.8539, "step": 10877 }, { "epoch": 0.8385753931544866, "grad_norm": 3.696632146835327, "learning_rate": 6.68001525206115e-07, "loss": 0.9736, "step": 10878 }, { "epoch": 0.8386524822695035, "grad_norm": 3.7102134227752686, "learning_rate": 6.673782466857165e-07, "loss": 0.9112, "step": 10879 }, { "epoch": 0.8387295713845205, "grad_norm": 4.029872417449951, "learning_rate": 6.667552382818449e-07, "loss": 0.9195, "step": 10880 }, { "epoch": 0.8388066604995374, "grad_norm": 4.231304168701172, "learning_rate": 6.661325000333419e-07, "loss": 0.9635, "step": 10881 }, { "epoch": 0.8388837496145545, "grad_norm": 3.9188613891601562, "learning_rate": 6.655100319790314e-07, "loss": 0.7859, "step": 10882 }, { "epoch": 0.8389608387295714, "grad_norm": 3.596547842025757, "learning_rate": 6.64887834157723e-07, "loss": 0.8746, "step": 10883 }, { "epoch": 0.8390379278445883, "grad_norm": 3.9885101318359375, "learning_rate": 6.642659066082046e-07, "loss": 0.922, "step": 10884 }, { "epoch": 0.8391150169596053, "grad_norm": 3.787560224533081, "learning_rate": 6.636442493692518e-07, "loss": 0.894, "step": 10885 }, { "epoch": 0.8391921060746222, "grad_norm": 3.884455680847168, "learning_rate": 6.630228624796215e-07, "loss": 0.9345, "step": 10886 }, { "epoch": 0.8392691951896393, "grad_norm": 3.6436421871185303, "learning_rate": 6.624017459780541e-07, "loss": 0.837, "step": 10887 }, { "epoch": 0.8393462843046562, "grad_norm": 3.4954371452331543, "learning_rate": 6.617808999032727e-07, "loss": 0.8966, "step": 10888 }, { "epoch": 0.8394233734196731, "grad_norm": 4.002804279327393, "learning_rate": 6.611603242939846e-07, "loss": 1.0232, "step": 10889 }, { "epoch": 0.8395004625346901, "grad_norm": 3.798150062561035, "learning_rate": 6.605400191888784e-07, "loss": 0.9284, "step": 10890 }, { "epoch": 0.839577551649707, "grad_norm": 3.5775742530822754, "learning_rate": 6.599199846266274e-07, "loss": 0.8892, "step": 10891 }, { "epoch": 0.8396546407647241, "grad_norm": 3.491213321685791, "learning_rate": 6.59300220645887e-07, "loss": 0.9179, "step": 10892 }, { "epoch": 0.839731729879741, "grad_norm": 3.7937722206115723, "learning_rate": 6.586807272852969e-07, "loss": 0.9093, "step": 10893 }, { "epoch": 0.8398088189947579, "grad_norm": 4.150206089019775, "learning_rate": 6.580615045834803e-07, "loss": 0.9983, "step": 10894 }, { "epoch": 0.8398859081097749, "grad_norm": 3.7913031578063965, "learning_rate": 6.574425525790407e-07, "loss": 0.8851, "step": 10895 }, { "epoch": 0.8399629972247918, "grad_norm": 3.3941149711608887, "learning_rate": 6.568238713105668e-07, "loss": 0.8295, "step": 10896 }, { "epoch": 0.8400400863398089, "grad_norm": 3.6495449542999268, "learning_rate": 6.562054608166307e-07, "loss": 0.7818, "step": 10897 }, { "epoch": 0.8401171754548258, "grad_norm": 3.510993719100952, "learning_rate": 6.555873211357872e-07, "loss": 0.8457, "step": 10898 }, { "epoch": 0.8401942645698427, "grad_norm": 3.769568920135498, "learning_rate": 6.549694523065742e-07, "loss": 0.9033, "step": 10899 }, { "epoch": 0.8402713536848597, "grad_norm": 3.4650299549102783, "learning_rate": 6.543518543675132e-07, "loss": 0.7952, "step": 10900 }, { "epoch": 0.8403484427998766, "grad_norm": 3.828594923019409, "learning_rate": 6.537345273571061e-07, "loss": 1.0226, "step": 10901 }, { "epoch": 0.8404255319148937, "grad_norm": 4.020386695861816, "learning_rate": 6.531174713138416e-07, "loss": 0.9334, "step": 10902 }, { "epoch": 0.8405026210299106, "grad_norm": 4.063125133514404, "learning_rate": 6.525006862761895e-07, "loss": 0.8969, "step": 10903 }, { "epoch": 0.8405797101449275, "grad_norm": 4.093360900878906, "learning_rate": 6.51884172282603e-07, "loss": 1.0452, "step": 10904 }, { "epoch": 0.8406567992599445, "grad_norm": 3.666809320449829, "learning_rate": 6.512679293715208e-07, "loss": 0.9207, "step": 10905 }, { "epoch": 0.8407338883749614, "grad_norm": 3.7799835205078125, "learning_rate": 6.506519575813591e-07, "loss": 0.8436, "step": 10906 }, { "epoch": 0.8408109774899785, "grad_norm": 3.6833157539367676, "learning_rate": 6.500362569505215e-07, "loss": 0.9263, "step": 10907 }, { "epoch": 0.8408880666049954, "grad_norm": 3.6458330154418945, "learning_rate": 6.494208275173947e-07, "loss": 0.9459, "step": 10908 }, { "epoch": 0.8409651557200123, "grad_norm": 3.5508716106414795, "learning_rate": 6.488056693203471e-07, "loss": 0.7733, "step": 10909 }, { "epoch": 0.8410422448350293, "grad_norm": 3.958599805831909, "learning_rate": 6.481907823977307e-07, "loss": 0.9815, "step": 10910 }, { "epoch": 0.8411193339500462, "grad_norm": 4.111875057220459, "learning_rate": 6.47576166787881e-07, "loss": 0.9399, "step": 10911 }, { "epoch": 0.8411964230650633, "grad_norm": 3.8634867668151855, "learning_rate": 6.469618225291141e-07, "loss": 0.8141, "step": 10912 }, { "epoch": 0.8412735121800802, "grad_norm": 4.0127716064453125, "learning_rate": 6.463477496597332e-07, "loss": 0.9476, "step": 10913 }, { "epoch": 0.8413506012950971, "grad_norm": 3.996286153793335, "learning_rate": 6.45733948218022e-07, "loss": 0.9159, "step": 10914 }, { "epoch": 0.8414276904101141, "grad_norm": 3.8326454162597656, "learning_rate": 6.451204182422488e-07, "loss": 0.8948, "step": 10915 }, { "epoch": 0.841504779525131, "grad_norm": 3.672430992126465, "learning_rate": 6.44507159770662e-07, "loss": 0.9338, "step": 10916 }, { "epoch": 0.8415818686401481, "grad_norm": 3.8770806789398193, "learning_rate": 6.43894172841496e-07, "loss": 0.9311, "step": 10917 }, { "epoch": 0.841658957755165, "grad_norm": 3.768202304840088, "learning_rate": 6.43281457492968e-07, "loss": 0.9567, "step": 10918 }, { "epoch": 0.8417360468701819, "grad_norm": 4.294236660003662, "learning_rate": 6.426690137632763e-07, "loss": 0.9294, "step": 10919 }, { "epoch": 0.8418131359851989, "grad_norm": 3.6829004287719727, "learning_rate": 6.420568416906059e-07, "loss": 0.9141, "step": 10920 }, { "epoch": 0.8418902251002158, "grad_norm": 4.125067710876465, "learning_rate": 6.414449413131202e-07, "loss": 0.9829, "step": 10921 }, { "epoch": 0.8419673142152329, "grad_norm": 3.662050247192383, "learning_rate": 6.408333126689686e-07, "loss": 0.9487, "step": 10922 }, { "epoch": 0.8420444033302498, "grad_norm": 4.144391059875488, "learning_rate": 6.402219557962835e-07, "loss": 0.9397, "step": 10923 }, { "epoch": 0.8421214924452667, "grad_norm": 4.148819446563721, "learning_rate": 6.396108707331794e-07, "loss": 1.0001, "step": 10924 }, { "epoch": 0.8421985815602837, "grad_norm": 3.679710865020752, "learning_rate": 6.390000575177546e-07, "loss": 0.9325, "step": 10925 }, { "epoch": 0.8422756706753006, "grad_norm": 3.7735555171966553, "learning_rate": 6.38389516188091e-07, "loss": 0.9016, "step": 10926 }, { "epoch": 0.8423527597903177, "grad_norm": 4.009374618530273, "learning_rate": 6.377792467822502e-07, "loss": 0.9601, "step": 10927 }, { "epoch": 0.8424298489053346, "grad_norm": 3.806119441986084, "learning_rate": 6.371692493382814e-07, "loss": 0.9, "step": 10928 }, { "epoch": 0.8425069380203515, "grad_norm": 3.881964921951294, "learning_rate": 6.36559523894214e-07, "loss": 0.9717, "step": 10929 }, { "epoch": 0.8425840271353685, "grad_norm": 3.9612395763397217, "learning_rate": 6.359500704880617e-07, "loss": 0.955, "step": 10930 }, { "epoch": 0.8426611162503854, "grad_norm": 3.793069839477539, "learning_rate": 6.353408891578212e-07, "loss": 0.8759, "step": 10931 }, { "epoch": 0.8427382053654024, "grad_norm": 3.9942023754119873, "learning_rate": 6.347319799414702e-07, "loss": 0.9591, "step": 10932 }, { "epoch": 0.8428152944804194, "grad_norm": 3.749751567840576, "learning_rate": 6.341233428769722e-07, "loss": 0.8971, "step": 10933 }, { "epoch": 0.8428923835954363, "grad_norm": 3.6659231185913086, "learning_rate": 6.33514978002272e-07, "loss": 0.7227, "step": 10934 }, { "epoch": 0.8429694727104533, "grad_norm": 3.700681686401367, "learning_rate": 6.329068853552983e-07, "loss": 0.8878, "step": 10935 }, { "epoch": 0.8430465618254702, "grad_norm": 3.738607883453369, "learning_rate": 6.322990649739624e-07, "loss": 0.924, "step": 10936 }, { "epoch": 0.8431236509404872, "grad_norm": 3.6381099224090576, "learning_rate": 6.316915168961602e-07, "loss": 0.9039, "step": 10937 }, { "epoch": 0.8432007400555042, "grad_norm": 3.76474928855896, "learning_rate": 6.310842411597667e-07, "loss": 0.9657, "step": 10938 }, { "epoch": 0.8432778291705211, "grad_norm": 3.532024383544922, "learning_rate": 6.304772378026441e-07, "loss": 0.8998, "step": 10939 }, { "epoch": 0.8433549182855381, "grad_norm": 3.453786611557007, "learning_rate": 6.298705068626348e-07, "loss": 0.7852, "step": 10940 }, { "epoch": 0.843432007400555, "grad_norm": 3.8487155437469482, "learning_rate": 6.292640483775664e-07, "loss": 0.8156, "step": 10941 }, { "epoch": 0.843509096515572, "grad_norm": 3.691965341567993, "learning_rate": 6.286578623852485e-07, "loss": 0.9317, "step": 10942 }, { "epoch": 0.843586185630589, "grad_norm": 3.6872313022613525, "learning_rate": 6.280519489234721e-07, "loss": 0.8926, "step": 10943 }, { "epoch": 0.8436632747456059, "grad_norm": 3.968344211578369, "learning_rate": 6.274463080300142e-07, "loss": 0.9674, "step": 10944 }, { "epoch": 0.8437403638606229, "grad_norm": 3.671273708343506, "learning_rate": 6.268409397426323e-07, "loss": 0.8654, "step": 10945 }, { "epoch": 0.8438174529756398, "grad_norm": 3.5088984966278076, "learning_rate": 6.26235844099069e-07, "loss": 0.8773, "step": 10946 }, { "epoch": 0.8438945420906568, "grad_norm": 3.9786536693573, "learning_rate": 6.256310211370486e-07, "loss": 0.8877, "step": 10947 }, { "epoch": 0.8439716312056738, "grad_norm": 3.580746650695801, "learning_rate": 6.2502647089428e-07, "loss": 0.888, "step": 10948 }, { "epoch": 0.8440487203206907, "grad_norm": 3.8491809368133545, "learning_rate": 6.244221934084504e-07, "loss": 0.9276, "step": 10949 }, { "epoch": 0.8441258094357077, "grad_norm": 3.9452064037323, "learning_rate": 6.238181887172362e-07, "loss": 0.9588, "step": 10950 }, { "epoch": 0.8442028985507246, "grad_norm": 3.862786054611206, "learning_rate": 6.232144568582926e-07, "loss": 0.8399, "step": 10951 }, { "epoch": 0.8442799876657416, "grad_norm": 3.535684823989868, "learning_rate": 6.226109978692596e-07, "loss": 0.8093, "step": 10952 }, { "epoch": 0.8443570767807586, "grad_norm": 3.716644048690796, "learning_rate": 6.220078117877615e-07, "loss": 0.9213, "step": 10953 }, { "epoch": 0.8444341658957755, "grad_norm": 3.8293979167938232, "learning_rate": 6.214048986514004e-07, "loss": 0.9022, "step": 10954 }, { "epoch": 0.8445112550107925, "grad_norm": 3.8130550384521484, "learning_rate": 6.208022584977668e-07, "loss": 0.9534, "step": 10955 }, { "epoch": 0.8445883441258094, "grad_norm": 3.5550150871276855, "learning_rate": 6.201998913644319e-07, "loss": 0.8563, "step": 10956 }, { "epoch": 0.8446654332408264, "grad_norm": 3.5891706943511963, "learning_rate": 6.195977972889505e-07, "loss": 0.8518, "step": 10957 }, { "epoch": 0.8447425223558434, "grad_norm": 3.988762855529785, "learning_rate": 6.189959763088593e-07, "loss": 0.9264, "step": 10958 }, { "epoch": 0.8448196114708603, "grad_norm": 3.7621021270751953, "learning_rate": 6.183944284616794e-07, "loss": 0.8383, "step": 10959 }, { "epoch": 0.8448967005858773, "grad_norm": 3.256624460220337, "learning_rate": 6.177931537849141e-07, "loss": 0.7408, "step": 10960 }, { "epoch": 0.8449737897008942, "grad_norm": 3.6678712368011475, "learning_rate": 6.17192152316049e-07, "loss": 0.8796, "step": 10961 }, { "epoch": 0.8450508788159112, "grad_norm": 3.6658334732055664, "learning_rate": 6.165914240925547e-07, "loss": 0.9107, "step": 10962 }, { "epoch": 0.8451279679309281, "grad_norm": 3.5411484241485596, "learning_rate": 6.15990969151884e-07, "loss": 0.8073, "step": 10963 }, { "epoch": 0.8452050570459451, "grad_norm": 3.56990385055542, "learning_rate": 6.153907875314697e-07, "loss": 0.8502, "step": 10964 }, { "epoch": 0.8452821461609621, "grad_norm": 4.044010639190674, "learning_rate": 6.147908792687307e-07, "loss": 0.9423, "step": 10965 }, { "epoch": 0.845359235275979, "grad_norm": 3.776015043258667, "learning_rate": 6.141912444010694e-07, "loss": 0.9912, "step": 10966 }, { "epoch": 0.845436324390996, "grad_norm": 3.4817419052124023, "learning_rate": 6.135918829658694e-07, "loss": 0.8841, "step": 10967 }, { "epoch": 0.845513413506013, "grad_norm": 3.5754287242889404, "learning_rate": 6.129927950004988e-07, "loss": 0.8635, "step": 10968 }, { "epoch": 0.8455905026210299, "grad_norm": 3.851569414138794, "learning_rate": 6.123939805423051e-07, "loss": 1.0293, "step": 10969 }, { "epoch": 0.8456675917360469, "grad_norm": 3.6617162227630615, "learning_rate": 6.117954396286236e-07, "loss": 0.864, "step": 10970 }, { "epoch": 0.8457446808510638, "grad_norm": 3.597431182861328, "learning_rate": 6.111971722967686e-07, "loss": 0.9426, "step": 10971 }, { "epoch": 0.8458217699660808, "grad_norm": 3.592806816101074, "learning_rate": 6.105991785840398e-07, "loss": 0.9014, "step": 10972 }, { "epoch": 0.8458988590810977, "grad_norm": 3.4167540073394775, "learning_rate": 6.100014585277187e-07, "loss": 0.8378, "step": 10973 }, { "epoch": 0.8459759481961147, "grad_norm": 4.124277114868164, "learning_rate": 6.094040121650719e-07, "loss": 0.9724, "step": 10974 }, { "epoch": 0.8460530373111317, "grad_norm": 4.155192852020264, "learning_rate": 6.088068395333441e-07, "loss": 0.9636, "step": 10975 }, { "epoch": 0.8461301264261486, "grad_norm": 3.687875270843506, "learning_rate": 6.082099406697673e-07, "loss": 0.935, "step": 10976 }, { "epoch": 0.8462072155411656, "grad_norm": 3.8519952297210693, "learning_rate": 6.076133156115549e-07, "loss": 0.8827, "step": 10977 }, { "epoch": 0.8462843046561825, "grad_norm": 3.6095309257507324, "learning_rate": 6.070169643959034e-07, "loss": 0.9423, "step": 10978 }, { "epoch": 0.8463613937711995, "grad_norm": 3.734135866165161, "learning_rate": 6.064208870599935e-07, "loss": 0.9421, "step": 10979 }, { "epoch": 0.8464384828862165, "grad_norm": 3.8327412605285645, "learning_rate": 6.058250836409856e-07, "loss": 0.9157, "step": 10980 }, { "epoch": 0.8465155720012334, "grad_norm": 4.3431782722473145, "learning_rate": 6.052295541760256e-07, "loss": 1.0126, "step": 10981 }, { "epoch": 0.8465926611162504, "grad_norm": 3.996701717376709, "learning_rate": 6.046342987022419e-07, "loss": 0.9892, "step": 10982 }, { "epoch": 0.8466697502312673, "grad_norm": 4.109698295593262, "learning_rate": 6.04039317256745e-07, "loss": 0.9538, "step": 10983 }, { "epoch": 0.8467468393462843, "grad_norm": 3.8181276321411133, "learning_rate": 6.0344460987663e-07, "loss": 0.8675, "step": 10984 }, { "epoch": 0.8468239284613013, "grad_norm": 3.624297618865967, "learning_rate": 6.028501765989736e-07, "loss": 0.9302, "step": 10985 }, { "epoch": 0.8469010175763182, "grad_norm": 3.416637659072876, "learning_rate": 6.02256017460835e-07, "loss": 0.8451, "step": 10986 }, { "epoch": 0.8469781066913352, "grad_norm": 3.6329896450042725, "learning_rate": 6.016621324992566e-07, "loss": 0.9613, "step": 10987 }, { "epoch": 0.8470551958063521, "grad_norm": 4.0961408615112305, "learning_rate": 6.010685217512647e-07, "loss": 0.9912, "step": 10988 }, { "epoch": 0.847132284921369, "grad_norm": 3.716765880584717, "learning_rate": 6.004751852538682e-07, "loss": 0.9723, "step": 10989 }, { "epoch": 0.8472093740363861, "grad_norm": 3.594797372817993, "learning_rate": 5.998821230440588e-07, "loss": 0.8827, "step": 10990 }, { "epoch": 0.847286463151403, "grad_norm": 4.180978298187256, "learning_rate": 5.992893351588097e-07, "loss": 1.004, "step": 10991 }, { "epoch": 0.84736355226642, "grad_norm": 3.7273788452148438, "learning_rate": 5.986968216350786e-07, "loss": 0.859, "step": 10992 }, { "epoch": 0.8474406413814369, "grad_norm": 3.659938097000122, "learning_rate": 5.981045825098053e-07, "loss": 0.9083, "step": 10993 }, { "epoch": 0.8475177304964538, "grad_norm": 4.153972625732422, "learning_rate": 5.975126178199136e-07, "loss": 0.8852, "step": 10994 }, { "epoch": 0.8475948196114709, "grad_norm": 3.5449161529541016, "learning_rate": 5.969209276023091e-07, "loss": 0.7538, "step": 10995 }, { "epoch": 0.8476719087264878, "grad_norm": 3.8162269592285156, "learning_rate": 5.963295118938816e-07, "loss": 0.8447, "step": 10996 }, { "epoch": 0.8477489978415048, "grad_norm": 3.6209535598754883, "learning_rate": 5.957383707315006e-07, "loss": 0.9266, "step": 10997 }, { "epoch": 0.8478260869565217, "grad_norm": 3.3889384269714355, "learning_rate": 5.951475041520222e-07, "loss": 0.7844, "step": 10998 }, { "epoch": 0.8479031760715386, "grad_norm": 3.7911598682403564, "learning_rate": 5.945569121922834e-07, "loss": 0.876, "step": 10999 }, { "epoch": 0.8479802651865557, "grad_norm": 3.6760973930358887, "learning_rate": 5.939665948891049e-07, "loss": 0.9148, "step": 11000 }, { "epoch": 0.8480573543015726, "grad_norm": 3.681337833404541, "learning_rate": 5.9337655227929e-07, "loss": 0.9059, "step": 11001 }, { "epoch": 0.8481344434165896, "grad_norm": 3.6630451679229736, "learning_rate": 5.927867843996243e-07, "loss": 0.8611, "step": 11002 }, { "epoch": 0.8482115325316065, "grad_norm": 3.8612163066864014, "learning_rate": 5.921972912868768e-07, "loss": 0.9276, "step": 11003 }, { "epoch": 0.8482886216466234, "grad_norm": 3.941199541091919, "learning_rate": 5.916080729778e-07, "loss": 0.9488, "step": 11004 }, { "epoch": 0.8483657107616405, "grad_norm": 3.7736704349517822, "learning_rate": 5.910191295091289e-07, "loss": 0.8789, "step": 11005 }, { "epoch": 0.8484427998766574, "grad_norm": 3.812868595123291, "learning_rate": 5.904304609175798e-07, "loss": 0.9216, "step": 11006 }, { "epoch": 0.8485198889916744, "grad_norm": 3.7537074089050293, "learning_rate": 5.89842067239853e-07, "loss": 0.8657, "step": 11007 }, { "epoch": 0.8485969781066913, "grad_norm": 3.538891553878784, "learning_rate": 5.892539485126331e-07, "loss": 0.9183, "step": 11008 }, { "epoch": 0.8486740672217082, "grad_norm": 3.7746341228485107, "learning_rate": 5.886661047725856e-07, "loss": 0.9663, "step": 11009 }, { "epoch": 0.8487511563367253, "grad_norm": 4.052391529083252, "learning_rate": 5.880785360563596e-07, "loss": 0.9895, "step": 11010 }, { "epoch": 0.8488282454517422, "grad_norm": 3.68890643119812, "learning_rate": 5.87491242400588e-07, "loss": 0.836, "step": 11011 }, { "epoch": 0.8489053345667592, "grad_norm": 3.5953805446624756, "learning_rate": 5.869042238418832e-07, "loss": 0.9303, "step": 11012 }, { "epoch": 0.8489824236817761, "grad_norm": 3.813616991043091, "learning_rate": 5.863174804168442e-07, "loss": 0.9442, "step": 11013 }, { "epoch": 0.8490595127967931, "grad_norm": 3.9077954292297363, "learning_rate": 5.857310121620513e-07, "loss": 0.9025, "step": 11014 }, { "epoch": 0.8491366019118101, "grad_norm": 4.491962909698486, "learning_rate": 5.851448191140674e-07, "loss": 0.9682, "step": 11015 }, { "epoch": 0.849213691026827, "grad_norm": 3.854057550430298, "learning_rate": 5.845589013094405e-07, "loss": 0.9459, "step": 11016 }, { "epoch": 0.849290780141844, "grad_norm": 3.7456696033477783, "learning_rate": 5.839732587846963e-07, "loss": 0.9548, "step": 11017 }, { "epoch": 0.8493678692568609, "grad_norm": 3.6205060482025146, "learning_rate": 5.833878915763485e-07, "loss": 0.9676, "step": 11018 }, { "epoch": 0.849444958371878, "grad_norm": 3.759831428527832, "learning_rate": 5.828027997208918e-07, "loss": 0.8192, "step": 11019 }, { "epoch": 0.8495220474868949, "grad_norm": 3.9140279293060303, "learning_rate": 5.822179832548025e-07, "loss": 0.9411, "step": 11020 }, { "epoch": 0.8495991366019118, "grad_norm": 3.6115550994873047, "learning_rate": 5.81633442214542e-07, "loss": 0.8644, "step": 11021 }, { "epoch": 0.8496762257169288, "grad_norm": 3.4248645305633545, "learning_rate": 5.810491766365545e-07, "loss": 0.897, "step": 11022 }, { "epoch": 0.8497533148319457, "grad_norm": 4.218714237213135, "learning_rate": 5.80465186557263e-07, "loss": 0.9441, "step": 11023 }, { "epoch": 0.8498304039469627, "grad_norm": 3.3824057579040527, "learning_rate": 5.798814720130779e-07, "loss": 0.8109, "step": 11024 }, { "epoch": 0.8499074930619797, "grad_norm": 3.555572032928467, "learning_rate": 5.792980330403908e-07, "loss": 0.9638, "step": 11025 }, { "epoch": 0.8499845821769966, "grad_norm": 3.7353179454803467, "learning_rate": 5.787148696755757e-07, "loss": 0.997, "step": 11026 }, { "epoch": 0.8500616712920136, "grad_norm": 3.5979185104370117, "learning_rate": 5.781319819549913e-07, "loss": 0.8548, "step": 11027 }, { "epoch": 0.8501387604070305, "grad_norm": 3.9905002117156982, "learning_rate": 5.775493699149754e-07, "loss": 0.9858, "step": 11028 }, { "epoch": 0.8502158495220475, "grad_norm": 3.6577179431915283, "learning_rate": 5.769670335918515e-07, "loss": 0.9216, "step": 11029 }, { "epoch": 0.8502929386370645, "grad_norm": 4.757742404937744, "learning_rate": 5.763849730219257e-07, "loss": 1.0233, "step": 11030 }, { "epoch": 0.8503700277520814, "grad_norm": 3.6999893188476562, "learning_rate": 5.758031882414861e-07, "loss": 0.9356, "step": 11031 }, { "epoch": 0.8504471168670984, "grad_norm": 4.301717281341553, "learning_rate": 5.752216792868048e-07, "loss": 1.0229, "step": 11032 }, { "epoch": 0.8505242059821153, "grad_norm": 3.8095266819000244, "learning_rate": 5.746404461941358e-07, "loss": 0.965, "step": 11033 }, { "epoch": 0.8506012950971323, "grad_norm": 3.8716020584106445, "learning_rate": 5.740594889997147e-07, "loss": 0.8976, "step": 11034 }, { "epoch": 0.8506783842121493, "grad_norm": 3.362471103668213, "learning_rate": 5.734788077397618e-07, "loss": 0.8177, "step": 11035 }, { "epoch": 0.8507554733271662, "grad_norm": 3.7064013481140137, "learning_rate": 5.728984024504796e-07, "loss": 0.9328, "step": 11036 }, { "epoch": 0.8508325624421832, "grad_norm": 3.902492046356201, "learning_rate": 5.723182731680538e-07, "loss": 1.0963, "step": 11037 }, { "epoch": 0.8509096515572001, "grad_norm": 3.7971115112304688, "learning_rate": 5.717384199286529e-07, "loss": 0.9039, "step": 11038 }, { "epoch": 0.8509867406722171, "grad_norm": 3.7627763748168945, "learning_rate": 5.711588427684262e-07, "loss": 0.9493, "step": 11039 }, { "epoch": 0.851063829787234, "grad_norm": 3.705435037612915, "learning_rate": 5.705795417235077e-07, "loss": 0.9674, "step": 11040 }, { "epoch": 0.851140918902251, "grad_norm": 4.405561447143555, "learning_rate": 5.700005168300144e-07, "loss": 0.9634, "step": 11041 }, { "epoch": 0.851218008017268, "grad_norm": 3.6750857830047607, "learning_rate": 5.694217681240455e-07, "loss": 0.9182, "step": 11042 }, { "epoch": 0.8512950971322849, "grad_norm": 3.666693925857544, "learning_rate": 5.688432956416823e-07, "loss": 0.8737, "step": 11043 }, { "epoch": 0.8513721862473019, "grad_norm": 3.796339988708496, "learning_rate": 5.682650994189915e-07, "loss": 0.9079, "step": 11044 }, { "epoch": 0.8514492753623188, "grad_norm": 3.8772926330566406, "learning_rate": 5.676871794920169e-07, "loss": 0.8889, "step": 11045 }, { "epoch": 0.8515263644773358, "grad_norm": 3.8630027770996094, "learning_rate": 5.671095358967926e-07, "loss": 0.9146, "step": 11046 }, { "epoch": 0.8516034535923528, "grad_norm": 3.968269109725952, "learning_rate": 5.665321686693298e-07, "loss": 0.9438, "step": 11047 }, { "epoch": 0.8516805427073697, "grad_norm": 3.7893552780151367, "learning_rate": 5.659550778456258e-07, "loss": 0.9303, "step": 11048 }, { "epoch": 0.8517576318223867, "grad_norm": 3.5288009643554688, "learning_rate": 5.653782634616573e-07, "loss": 0.8899, "step": 11049 }, { "epoch": 0.8518347209374036, "grad_norm": 4.320144176483154, "learning_rate": 5.648017255533866e-07, "loss": 0.9022, "step": 11050 }, { "epoch": 0.8519118100524206, "grad_norm": 3.9753165245056152, "learning_rate": 5.642254641567579e-07, "loss": 0.8956, "step": 11051 }, { "epoch": 0.8519888991674376, "grad_norm": 3.734354257583618, "learning_rate": 5.636494793076974e-07, "loss": 0.9239, "step": 11052 }, { "epoch": 0.8520659882824545, "grad_norm": 3.6947646141052246, "learning_rate": 5.630737710421174e-07, "loss": 0.9127, "step": 11053 }, { "epoch": 0.8521430773974715, "grad_norm": 3.678510904312134, "learning_rate": 5.624983393959066e-07, "loss": 0.8578, "step": 11054 }, { "epoch": 0.8522201665124884, "grad_norm": 3.4292240142822266, "learning_rate": 5.619231844049422e-07, "loss": 0.9145, "step": 11055 }, { "epoch": 0.8522972556275054, "grad_norm": 4.060187816619873, "learning_rate": 5.613483061050818e-07, "loss": 0.9417, "step": 11056 }, { "epoch": 0.8523743447425224, "grad_norm": 3.9965662956237793, "learning_rate": 5.607737045321666e-07, "loss": 0.7447, "step": 11057 }, { "epoch": 0.8524514338575393, "grad_norm": 3.6497106552124023, "learning_rate": 5.60199379722019e-07, "loss": 0.8786, "step": 11058 }, { "epoch": 0.8525285229725563, "grad_norm": 3.614408493041992, "learning_rate": 5.596253317104472e-07, "loss": 0.8301, "step": 11059 }, { "epoch": 0.8526056120875732, "grad_norm": 4.088712692260742, "learning_rate": 5.590515605332369e-07, "loss": 1.0084, "step": 11060 }, { "epoch": 0.8526827012025902, "grad_norm": 3.6503963470458984, "learning_rate": 5.584780662261624e-07, "loss": 0.8587, "step": 11061 }, { "epoch": 0.8527597903176072, "grad_norm": 3.714747667312622, "learning_rate": 5.579048488249766e-07, "loss": 0.9238, "step": 11062 }, { "epoch": 0.8528368794326241, "grad_norm": 4.048036098480225, "learning_rate": 5.573319083654177e-07, "loss": 0.953, "step": 11063 }, { "epoch": 0.8529139685476411, "grad_norm": 3.8897385597229004, "learning_rate": 5.56759244883206e-07, "loss": 0.8999, "step": 11064 }, { "epoch": 0.852991057662658, "grad_norm": 4.280935287475586, "learning_rate": 5.561868584140423e-07, "loss": 0.8316, "step": 11065 }, { "epoch": 0.853068146777675, "grad_norm": 3.4700443744659424, "learning_rate": 5.556147489936131e-07, "loss": 0.8984, "step": 11066 }, { "epoch": 0.853145235892692, "grad_norm": 3.7145278453826904, "learning_rate": 5.55042916657586e-07, "loss": 0.911, "step": 11067 }, { "epoch": 0.8532223250077089, "grad_norm": 3.821202278137207, "learning_rate": 5.54471361441612e-07, "loss": 1.0008, "step": 11068 }, { "epoch": 0.8532994141227259, "grad_norm": 3.5977516174316406, "learning_rate": 5.539000833813246e-07, "loss": 0.9296, "step": 11069 }, { "epoch": 0.8533765032377428, "grad_norm": 3.5324435234069824, "learning_rate": 5.533290825123411e-07, "loss": 0.7846, "step": 11070 }, { "epoch": 0.8534535923527597, "grad_norm": 3.96653151512146, "learning_rate": 5.527583588702584e-07, "loss": 0.9285, "step": 11071 }, { "epoch": 0.8535306814677768, "grad_norm": 4.007519721984863, "learning_rate": 5.521879124906587e-07, "loss": 0.961, "step": 11072 }, { "epoch": 0.8536077705827937, "grad_norm": 3.8947818279266357, "learning_rate": 5.516177434091074e-07, "loss": 0.8486, "step": 11073 }, { "epoch": 0.8536848596978107, "grad_norm": 3.4934535026550293, "learning_rate": 5.510478516611512e-07, "loss": 0.8873, "step": 11074 }, { "epoch": 0.8537619488128276, "grad_norm": 3.4623193740844727, "learning_rate": 5.504782372823203e-07, "loss": 0.9679, "step": 11075 }, { "epoch": 0.8538390379278445, "grad_norm": 3.704373836517334, "learning_rate": 5.499089003081259e-07, "loss": 0.8837, "step": 11076 }, { "epoch": 0.8539161270428616, "grad_norm": 3.9960663318634033, "learning_rate": 5.493398407740641e-07, "loss": 0.9834, "step": 11077 }, { "epoch": 0.8539932161578785, "grad_norm": 4.131714820861816, "learning_rate": 5.48771058715612e-07, "loss": 1.0009, "step": 11078 }, { "epoch": 0.8540703052728955, "grad_norm": 3.8496928215026855, "learning_rate": 5.482025541682312e-07, "loss": 0.902, "step": 11079 }, { "epoch": 0.8541473943879124, "grad_norm": 3.6283254623413086, "learning_rate": 5.476343271673651e-07, "loss": 0.9259, "step": 11080 }, { "epoch": 0.8542244835029293, "grad_norm": 3.9260733127593994, "learning_rate": 5.470663777484402e-07, "loss": 0.9205, "step": 11081 }, { "epoch": 0.8543015726179464, "grad_norm": 3.7307844161987305, "learning_rate": 5.464987059468629e-07, "loss": 0.9092, "step": 11082 }, { "epoch": 0.8543786617329633, "grad_norm": 3.9613077640533447, "learning_rate": 5.459313117980264e-07, "loss": 0.9234, "step": 11083 }, { "epoch": 0.8544557508479803, "grad_norm": 3.670541524887085, "learning_rate": 5.45364195337304e-07, "loss": 0.9093, "step": 11084 }, { "epoch": 0.8545328399629972, "grad_norm": 3.992414951324463, "learning_rate": 5.447973566000531e-07, "loss": 0.9303, "step": 11085 }, { "epoch": 0.8546099290780141, "grad_norm": 3.7107622623443604, "learning_rate": 5.442307956216142e-07, "loss": 0.8985, "step": 11086 }, { "epoch": 0.8546870181930312, "grad_norm": 4.1595354080200195, "learning_rate": 5.436645124373069e-07, "loss": 0.906, "step": 11087 }, { "epoch": 0.8547641073080481, "grad_norm": 3.343167543411255, "learning_rate": 5.430985070824374e-07, "loss": 0.7814, "step": 11088 }, { "epoch": 0.8548411964230651, "grad_norm": 3.637519598007202, "learning_rate": 5.42532779592293e-07, "loss": 0.8609, "step": 11089 }, { "epoch": 0.854918285538082, "grad_norm": 3.568153142929077, "learning_rate": 5.419673300021427e-07, "loss": 0.9346, "step": 11090 }, { "epoch": 0.8549953746530989, "grad_norm": 3.780703067779541, "learning_rate": 5.414021583472429e-07, "loss": 0.9486, "step": 11091 }, { "epoch": 0.855072463768116, "grad_norm": 3.555079460144043, "learning_rate": 5.408372646628257e-07, "loss": 0.8887, "step": 11092 }, { "epoch": 0.8551495528831329, "grad_norm": 3.8127081394195557, "learning_rate": 5.40272648984111e-07, "loss": 0.8914, "step": 11093 }, { "epoch": 0.8552266419981499, "grad_norm": 3.8799734115600586, "learning_rate": 5.397083113462986e-07, "loss": 0.8731, "step": 11094 }, { "epoch": 0.8553037311131668, "grad_norm": 3.8874175548553467, "learning_rate": 5.391442517845724e-07, "loss": 0.8681, "step": 11095 }, { "epoch": 0.8553808202281837, "grad_norm": 3.862079381942749, "learning_rate": 5.385804703341002e-07, "loss": 0.895, "step": 11096 }, { "epoch": 0.8554579093432008, "grad_norm": 3.89817476272583, "learning_rate": 5.380169670300284e-07, "loss": 0.8392, "step": 11097 }, { "epoch": 0.8555349984582177, "grad_norm": 3.441835403442383, "learning_rate": 5.374537419074893e-07, "loss": 0.8703, "step": 11098 }, { "epoch": 0.8556120875732347, "grad_norm": 3.845632314682007, "learning_rate": 5.368907950015972e-07, "loss": 0.8489, "step": 11099 }, { "epoch": 0.8556891766882516, "grad_norm": 3.8767735958099365, "learning_rate": 5.36328126347449e-07, "loss": 0.8637, "step": 11100 }, { "epoch": 0.8557662658032685, "grad_norm": 3.9100561141967773, "learning_rate": 5.357657359801249e-07, "loss": 0.951, "step": 11101 }, { "epoch": 0.8558433549182856, "grad_norm": 3.784698486328125, "learning_rate": 5.352036239346858e-07, "loss": 0.9673, "step": 11102 }, { "epoch": 0.8559204440333025, "grad_norm": 3.8566789627075195, "learning_rate": 5.34641790246177e-07, "loss": 0.8962, "step": 11103 }, { "epoch": 0.8559975331483195, "grad_norm": 3.3040380477905273, "learning_rate": 5.340802349496254e-07, "loss": 0.8545, "step": 11104 }, { "epoch": 0.8560746222633364, "grad_norm": 3.585635185241699, "learning_rate": 5.335189580800421e-07, "loss": 0.8885, "step": 11105 }, { "epoch": 0.8561517113783533, "grad_norm": 3.8033549785614014, "learning_rate": 5.329579596724188e-07, "loss": 0.8385, "step": 11106 }, { "epoch": 0.8562288004933704, "grad_norm": 3.958906888961792, "learning_rate": 5.323972397617327e-07, "loss": 0.9254, "step": 11107 }, { "epoch": 0.8563058896083873, "grad_norm": 3.711397647857666, "learning_rate": 5.318367983829393e-07, "loss": 0.9813, "step": 11108 }, { "epoch": 0.8563829787234043, "grad_norm": 3.6522676944732666, "learning_rate": 5.312766355709803e-07, "loss": 0.8936, "step": 11109 }, { "epoch": 0.8564600678384212, "grad_norm": 4.029238700866699, "learning_rate": 5.307167513607786e-07, "loss": 1.009, "step": 11110 }, { "epoch": 0.8565371569534381, "grad_norm": 3.7067220211029053, "learning_rate": 5.301571457872407e-07, "loss": 0.9072, "step": 11111 }, { "epoch": 0.8566142460684552, "grad_norm": 4.054270267486572, "learning_rate": 5.295978188852557e-07, "loss": 1.0588, "step": 11112 }, { "epoch": 0.8566913351834721, "grad_norm": 3.697340250015259, "learning_rate": 5.290387706896933e-07, "loss": 0.9871, "step": 11113 }, { "epoch": 0.8567684242984891, "grad_norm": 4.2902092933654785, "learning_rate": 5.284800012354075e-07, "loss": 0.8552, "step": 11114 }, { "epoch": 0.856845513413506, "grad_norm": 3.741992950439453, "learning_rate": 5.279215105572355e-07, "loss": 0.8789, "step": 11115 }, { "epoch": 0.8569226025285229, "grad_norm": 3.8144991397857666, "learning_rate": 5.273632986899951e-07, "loss": 0.8931, "step": 11116 }, { "epoch": 0.85699969164354, "grad_norm": 3.5984315872192383, "learning_rate": 5.268053656684891e-07, "loss": 0.9031, "step": 11117 }, { "epoch": 0.8570767807585569, "grad_norm": 4.40741491317749, "learning_rate": 5.262477115275022e-07, "loss": 0.9021, "step": 11118 }, { "epoch": 0.8571538698735739, "grad_norm": 3.9442696571350098, "learning_rate": 5.256903363017995e-07, "loss": 1.0166, "step": 11119 }, { "epoch": 0.8572309589885908, "grad_norm": 3.6909403800964355, "learning_rate": 5.251332400261311e-07, "loss": 0.8354, "step": 11120 }, { "epoch": 0.8573080481036077, "grad_norm": 3.769570827484131, "learning_rate": 5.24576422735229e-07, "loss": 0.9404, "step": 11121 }, { "epoch": 0.8573851372186247, "grad_norm": 3.7346904277801514, "learning_rate": 5.240198844638084e-07, "loss": 0.9651, "step": 11122 }, { "epoch": 0.8574622263336417, "grad_norm": 3.3625786304473877, "learning_rate": 5.234636252465675e-07, "loss": 0.9208, "step": 11123 }, { "epoch": 0.8575393154486587, "grad_norm": 3.8647756576538086, "learning_rate": 5.229076451181836e-07, "loss": 0.9616, "step": 11124 }, { "epoch": 0.8576164045636756, "grad_norm": 3.784053325653076, "learning_rate": 5.223519441133206e-07, "loss": 0.9683, "step": 11125 }, { "epoch": 0.8576934936786925, "grad_norm": 3.712662696838379, "learning_rate": 5.217965222666239e-07, "loss": 0.8376, "step": 11126 }, { "epoch": 0.8577705827937095, "grad_norm": 4.219189167022705, "learning_rate": 5.212413796127208e-07, "loss": 1.0494, "step": 11127 }, { "epoch": 0.8578476719087265, "grad_norm": 3.7683613300323486, "learning_rate": 5.206865161862212e-07, "loss": 0.9379, "step": 11128 }, { "epoch": 0.8579247610237435, "grad_norm": 3.439199447631836, "learning_rate": 5.201319320217196e-07, "loss": 0.8908, "step": 11129 }, { "epoch": 0.8580018501387604, "grad_norm": 3.727071762084961, "learning_rate": 5.195776271537894e-07, "loss": 0.8725, "step": 11130 }, { "epoch": 0.8580789392537773, "grad_norm": 3.9292681217193604, "learning_rate": 5.190236016169892e-07, "loss": 0.8883, "step": 11131 }, { "epoch": 0.8581560283687943, "grad_norm": 4.457842826843262, "learning_rate": 5.184698554458595e-07, "loss": 0.8266, "step": 11132 }, { "epoch": 0.8582331174838113, "grad_norm": 4.050070762634277, "learning_rate": 5.179163886749244e-07, "loss": 1.0174, "step": 11133 }, { "epoch": 0.8583102065988283, "grad_norm": 3.961498498916626, "learning_rate": 5.173632013386892e-07, "loss": 0.9666, "step": 11134 }, { "epoch": 0.8583872957138452, "grad_norm": 3.9840786457061768, "learning_rate": 5.168102934716419e-07, "loss": 0.857, "step": 11135 }, { "epoch": 0.8584643848288621, "grad_norm": 3.528966188430786, "learning_rate": 5.162576651082541e-07, "loss": 0.9403, "step": 11136 }, { "epoch": 0.8585414739438791, "grad_norm": 3.7930431365966797, "learning_rate": 5.157053162829789e-07, "loss": 0.919, "step": 11137 }, { "epoch": 0.8586185630588961, "grad_norm": 3.9015920162200928, "learning_rate": 5.151532470302523e-07, "loss": 0.9518, "step": 11138 }, { "epoch": 0.8586956521739131, "grad_norm": 3.5874063968658447, "learning_rate": 5.146014573844943e-07, "loss": 0.8774, "step": 11139 }, { "epoch": 0.85877274128893, "grad_norm": 3.664182662963867, "learning_rate": 5.140499473801036e-07, "loss": 0.9138, "step": 11140 }, { "epoch": 0.8588498304039469, "grad_norm": 3.809180736541748, "learning_rate": 5.134987170514654e-07, "loss": 0.8837, "step": 11141 }, { "epoch": 0.8589269195189639, "grad_norm": 4.012628078460693, "learning_rate": 5.129477664329463e-07, "loss": 0.9214, "step": 11142 }, { "epoch": 0.8590040086339809, "grad_norm": 3.4249749183654785, "learning_rate": 5.123970955588947e-07, "loss": 0.6997, "step": 11143 }, { "epoch": 0.8590810977489979, "grad_norm": 3.523764133453369, "learning_rate": 5.118467044636438e-07, "loss": 0.8808, "step": 11144 }, { "epoch": 0.8591581868640148, "grad_norm": 3.6512789726257324, "learning_rate": 5.112965931815045e-07, "loss": 0.9734, "step": 11145 }, { "epoch": 0.8592352759790317, "grad_norm": 3.7311131954193115, "learning_rate": 5.107467617467754e-07, "loss": 0.961, "step": 11146 }, { "epoch": 0.8593123650940487, "grad_norm": 3.953731060028076, "learning_rate": 5.101972101937352e-07, "loss": 0.9679, "step": 11147 }, { "epoch": 0.8593894542090657, "grad_norm": 3.7409465312957764, "learning_rate": 5.096479385566455e-07, "loss": 0.9236, "step": 11148 }, { "epoch": 0.8594665433240827, "grad_norm": 3.9198498725891113, "learning_rate": 5.090989468697515e-07, "loss": 0.9617, "step": 11149 }, { "epoch": 0.8595436324390996, "grad_norm": 3.991605520248413, "learning_rate": 5.085502351672788e-07, "loss": 0.8857, "step": 11150 }, { "epoch": 0.8596207215541165, "grad_norm": 3.804913282394409, "learning_rate": 5.080018034834367e-07, "loss": 0.9456, "step": 11151 }, { "epoch": 0.8596978106691335, "grad_norm": 3.8616576194763184, "learning_rate": 5.074536518524176e-07, "loss": 0.9924, "step": 11152 }, { "epoch": 0.8597748997841504, "grad_norm": 3.636204957962036, "learning_rate": 5.06905780308396e-07, "loss": 1.0058, "step": 11153 }, { "epoch": 0.8598519888991675, "grad_norm": 4.016709327697754, "learning_rate": 5.063581888855285e-07, "loss": 0.9465, "step": 11154 }, { "epoch": 0.8599290780141844, "grad_norm": 4.032883644104004, "learning_rate": 5.058108776179555e-07, "loss": 0.9796, "step": 11155 }, { "epoch": 0.8600061671292013, "grad_norm": 3.659719705581665, "learning_rate": 5.05263846539798e-07, "loss": 0.9076, "step": 11156 }, { "epoch": 0.8600832562442183, "grad_norm": 4.1367950439453125, "learning_rate": 5.0471709568516e-07, "loss": 0.9214, "step": 11157 }, { "epoch": 0.8601603453592352, "grad_norm": 4.058650493621826, "learning_rate": 5.041706250881301e-07, "loss": 1.0593, "step": 11158 }, { "epoch": 0.8602374344742523, "grad_norm": 3.848299980163574, "learning_rate": 5.036244347827768e-07, "loss": 0.8446, "step": 11159 }, { "epoch": 0.8603145235892692, "grad_norm": 3.458164930343628, "learning_rate": 5.030785248031534e-07, "loss": 0.877, "step": 11160 }, { "epoch": 0.8603916127042861, "grad_norm": 3.6949539184570312, "learning_rate": 5.025328951832936e-07, "loss": 0.8685, "step": 11161 }, { "epoch": 0.8604687018193031, "grad_norm": 4.433669090270996, "learning_rate": 5.019875459572143e-07, "loss": 0.9717, "step": 11162 }, { "epoch": 0.86054579093432, "grad_norm": 3.662029266357422, "learning_rate": 5.014424771589155e-07, "loss": 0.8557, "step": 11163 }, { "epoch": 0.8606228800493371, "grad_norm": 3.8234245777130127, "learning_rate": 5.008976888223799e-07, "loss": 0.9186, "step": 11164 }, { "epoch": 0.860699969164354, "grad_norm": 3.7250325679779053, "learning_rate": 5.003531809815721e-07, "loss": 0.9464, "step": 11165 }, { "epoch": 0.8607770582793709, "grad_norm": 3.8604021072387695, "learning_rate": 4.998089536704399e-07, "loss": 1.0107, "step": 11166 }, { "epoch": 0.8608541473943879, "grad_norm": 3.525672674179077, "learning_rate": 4.992650069229116e-07, "loss": 0.9176, "step": 11167 }, { "epoch": 0.8609312365094048, "grad_norm": 3.9230077266693115, "learning_rate": 4.987213407729002e-07, "loss": 0.8406, "step": 11168 }, { "epoch": 0.8610083256244219, "grad_norm": 3.6223180294036865, "learning_rate": 4.981779552543004e-07, "loss": 0.8505, "step": 11169 }, { "epoch": 0.8610854147394388, "grad_norm": 4.130772590637207, "learning_rate": 4.976348504009899e-07, "loss": 0.9977, "step": 11170 }, { "epoch": 0.8611625038544557, "grad_norm": 3.5042903423309326, "learning_rate": 4.970920262468282e-07, "loss": 0.8921, "step": 11171 }, { "epoch": 0.8612395929694727, "grad_norm": 4.355722427368164, "learning_rate": 4.965494828256573e-07, "loss": 0.8054, "step": 11172 }, { "epoch": 0.8613166820844896, "grad_norm": 3.7091667652130127, "learning_rate": 4.960072201713018e-07, "loss": 0.8937, "step": 11173 }, { "epoch": 0.8613937711995067, "grad_norm": 4.190767288208008, "learning_rate": 4.954652383175696e-07, "loss": 0.9878, "step": 11174 }, { "epoch": 0.8614708603145236, "grad_norm": 3.6260433197021484, "learning_rate": 4.949235372982503e-07, "loss": 0.9711, "step": 11175 }, { "epoch": 0.8615479494295405, "grad_norm": 3.458486318588257, "learning_rate": 4.943821171471158e-07, "loss": 0.8357, "step": 11176 }, { "epoch": 0.8616250385445575, "grad_norm": 3.68377423286438, "learning_rate": 4.93840977897922e-07, "loss": 0.8426, "step": 11177 }, { "epoch": 0.8617021276595744, "grad_norm": 3.642500877380371, "learning_rate": 4.933001195844034e-07, "loss": 0.8853, "step": 11178 }, { "epoch": 0.8617792167745915, "grad_norm": 3.628203868865967, "learning_rate": 4.927595422402826e-07, "loss": 0.9523, "step": 11179 }, { "epoch": 0.8618563058896084, "grad_norm": 3.9804065227508545, "learning_rate": 4.922192458992609e-07, "loss": 1.0091, "step": 11180 }, { "epoch": 0.8619333950046254, "grad_norm": 4.004647731781006, "learning_rate": 4.916792305950235e-07, "loss": 1.0064, "step": 11181 }, { "epoch": 0.8620104841196423, "grad_norm": 3.7954509258270264, "learning_rate": 4.911394963612359e-07, "loss": 0.919, "step": 11182 }, { "epoch": 0.8620875732346592, "grad_norm": 3.5825188159942627, "learning_rate": 4.906000432315489e-07, "loss": 0.9094, "step": 11183 }, { "epoch": 0.8621646623496763, "grad_norm": 3.406367778778076, "learning_rate": 4.900608712395943e-07, "loss": 0.917, "step": 11184 }, { "epoch": 0.8622417514646932, "grad_norm": 3.9264237880706787, "learning_rate": 4.895219804189871e-07, "loss": 0.8757, "step": 11185 }, { "epoch": 0.8623188405797102, "grad_norm": 3.8309881687164307, "learning_rate": 4.889833708033248e-07, "loss": 0.9881, "step": 11186 }, { "epoch": 0.8623959296947271, "grad_norm": 3.63254714012146, "learning_rate": 4.884450424261849e-07, "loss": 0.8489, "step": 11187 }, { "epoch": 0.862473018809744, "grad_norm": 3.7826976776123047, "learning_rate": 4.879069953211312e-07, "loss": 0.9321, "step": 11188 }, { "epoch": 0.8625501079247611, "grad_norm": 3.793102979660034, "learning_rate": 4.873692295217076e-07, "loss": 0.9313, "step": 11189 }, { "epoch": 0.862627197039778, "grad_norm": 3.558684825897217, "learning_rate": 4.868317450614407e-07, "loss": 0.9335, "step": 11190 }, { "epoch": 0.862704286154795, "grad_norm": 4.0856194496154785, "learning_rate": 4.862945419738401e-07, "loss": 0.9869, "step": 11191 }, { "epoch": 0.8627813752698119, "grad_norm": 3.5604751110076904, "learning_rate": 4.857576202923986e-07, "loss": 0.8317, "step": 11192 }, { "epoch": 0.8628584643848288, "grad_norm": 3.5392532348632812, "learning_rate": 4.852209800505892e-07, "loss": 0.8233, "step": 11193 }, { "epoch": 0.8629355534998459, "grad_norm": 3.6630494594573975, "learning_rate": 4.846846212818684e-07, "loss": 0.8782, "step": 11194 }, { "epoch": 0.8630126426148628, "grad_norm": 3.6344716548919678, "learning_rate": 4.841485440196763e-07, "loss": 0.8732, "step": 11195 }, { "epoch": 0.8630897317298798, "grad_norm": 3.5769948959350586, "learning_rate": 4.836127482974346e-07, "loss": 0.8491, "step": 11196 }, { "epoch": 0.8631668208448967, "grad_norm": 3.778287649154663, "learning_rate": 4.830772341485479e-07, "loss": 0.9085, "step": 11197 }, { "epoch": 0.8632439099599136, "grad_norm": 3.6463840007781982, "learning_rate": 4.825420016064009e-07, "loss": 0.9255, "step": 11198 }, { "epoch": 0.8633209990749307, "grad_norm": 3.8667242527008057, "learning_rate": 4.820070507043633e-07, "loss": 0.931, "step": 11199 }, { "epoch": 0.8633980881899476, "grad_norm": 3.825806140899658, "learning_rate": 4.814723814757871e-07, "loss": 0.9954, "step": 11200 }, { "epoch": 0.8634751773049646, "grad_norm": 3.5652754306793213, "learning_rate": 4.80937993954006e-07, "loss": 0.8503, "step": 11201 }, { "epoch": 0.8635522664199815, "grad_norm": 3.5560030937194824, "learning_rate": 4.804038881723361e-07, "loss": 0.8271, "step": 11202 }, { "epoch": 0.8636293555349984, "grad_norm": 3.7864773273468018, "learning_rate": 4.798700641640768e-07, "loss": 0.8704, "step": 11203 }, { "epoch": 0.8637064446500154, "grad_norm": 3.6049001216888428, "learning_rate": 4.793365219625079e-07, "loss": 0.9203, "step": 11204 }, { "epoch": 0.8637835337650324, "grad_norm": 3.6420271396636963, "learning_rate": 4.788032616008936e-07, "loss": 0.966, "step": 11205 }, { "epoch": 0.8638606228800494, "grad_norm": 3.835549831390381, "learning_rate": 4.782702831124803e-07, "loss": 0.9633, "step": 11206 }, { "epoch": 0.8639377119950663, "grad_norm": 3.744598388671875, "learning_rate": 4.777375865304962e-07, "loss": 0.8078, "step": 11207 }, { "epoch": 0.8640148011100832, "grad_norm": 3.756577491760254, "learning_rate": 4.772051718881532e-07, "loss": 0.9566, "step": 11208 }, { "epoch": 0.8640918902251002, "grad_norm": 3.558971643447876, "learning_rate": 4.766730392186425e-07, "loss": 0.9583, "step": 11209 }, { "epoch": 0.8641689793401172, "grad_norm": 3.711453676223755, "learning_rate": 4.761411885551409e-07, "loss": 0.9485, "step": 11210 }, { "epoch": 0.8642460684551342, "grad_norm": 3.7305915355682373, "learning_rate": 4.7560961993080636e-07, "loss": 0.9207, "step": 11211 }, { "epoch": 0.8643231575701511, "grad_norm": 3.7739152908325195, "learning_rate": 4.750783333787795e-07, "loss": 0.8622, "step": 11212 }, { "epoch": 0.864400246685168, "grad_norm": 3.8151278495788574, "learning_rate": 4.745473289321839e-07, "loss": 0.7792, "step": 11213 }, { "epoch": 0.864477335800185, "grad_norm": 3.5902225971221924, "learning_rate": 4.7401660662412477e-07, "loss": 0.9519, "step": 11214 }, { "epoch": 0.864554424915202, "grad_norm": 3.8135595321655273, "learning_rate": 4.7348616648768886e-07, "loss": 0.8904, "step": 11215 }, { "epoch": 0.864631514030219, "grad_norm": 3.9646761417388916, "learning_rate": 4.729560085559476e-07, "loss": 0.8801, "step": 11216 }, { "epoch": 0.8647086031452359, "grad_norm": 4.04872465133667, "learning_rate": 4.7242613286195227e-07, "loss": 1.0091, "step": 11217 }, { "epoch": 0.8647856922602528, "grad_norm": 3.746007204055786, "learning_rate": 4.718965394387387e-07, "loss": 0.9377, "step": 11218 }, { "epoch": 0.8648627813752698, "grad_norm": 3.77582049369812, "learning_rate": 4.7136722831932546e-07, "loss": 0.9175, "step": 11219 }, { "epoch": 0.8649398704902868, "grad_norm": 3.808149576187134, "learning_rate": 4.7083819953671007e-07, "loss": 0.8849, "step": 11220 }, { "epoch": 0.8650169596053038, "grad_norm": 3.639953851699829, "learning_rate": 4.703094531238761e-07, "loss": 0.8691, "step": 11221 }, { "epoch": 0.8650940487203207, "grad_norm": 4.364095211029053, "learning_rate": 4.6978098911378776e-07, "loss": 0.9547, "step": 11222 }, { "epoch": 0.8651711378353376, "grad_norm": 3.8730828762054443, "learning_rate": 4.6925280753939097e-07, "loss": 0.9314, "step": 11223 }, { "epoch": 0.8652482269503546, "grad_norm": 3.9587454795837402, "learning_rate": 4.687249084336182e-07, "loss": 0.9972, "step": 11224 }, { "epoch": 0.8653253160653716, "grad_norm": 3.558377504348755, "learning_rate": 4.6819729182937865e-07, "loss": 0.8813, "step": 11225 }, { "epoch": 0.8654024051803886, "grad_norm": 3.674800395965576, "learning_rate": 4.676699577595667e-07, "loss": 0.8476, "step": 11226 }, { "epoch": 0.8654794942954055, "grad_norm": 3.645923376083374, "learning_rate": 4.6714290625705983e-07, "loss": 0.9952, "step": 11227 }, { "epoch": 0.8655565834104224, "grad_norm": 4.369297981262207, "learning_rate": 4.666161373547162e-07, "loss": 0.9502, "step": 11228 }, { "epoch": 0.8656336725254394, "grad_norm": 3.8362910747528076, "learning_rate": 4.660896510853785e-07, "loss": 0.8966, "step": 11229 }, { "epoch": 0.8657107616404563, "grad_norm": 4.300778388977051, "learning_rate": 4.655634474818682e-07, "loss": 1.024, "step": 11230 }, { "epoch": 0.8657878507554734, "grad_norm": 3.7804079055786133, "learning_rate": 4.650375265769924e-07, "loss": 0.8685, "step": 11231 }, { "epoch": 0.8658649398704903, "grad_norm": 3.908895492553711, "learning_rate": 4.645118884035399e-07, "loss": 0.9242, "step": 11232 }, { "epoch": 0.8659420289855072, "grad_norm": 3.7380692958831787, "learning_rate": 4.639865329942811e-07, "loss": 0.8432, "step": 11233 }, { "epoch": 0.8660191181005242, "grad_norm": 3.6968612670898438, "learning_rate": 4.6346146038197035e-07, "loss": 0.9012, "step": 11234 }, { "epoch": 0.8660962072155411, "grad_norm": 3.667461395263672, "learning_rate": 4.6293667059934154e-07, "loss": 0.8959, "step": 11235 }, { "epoch": 0.8661732963305582, "grad_norm": 3.7316794395446777, "learning_rate": 4.624121636791129e-07, "loss": 0.9219, "step": 11236 }, { "epoch": 0.8662503854455751, "grad_norm": 3.7822744846343994, "learning_rate": 4.6188793965398493e-07, "loss": 0.9128, "step": 11237 }, { "epoch": 0.866327474560592, "grad_norm": 3.7413852214813232, "learning_rate": 4.6136399855664093e-07, "loss": 0.9516, "step": 11238 }, { "epoch": 0.866404563675609, "grad_norm": 3.792145013809204, "learning_rate": 4.6084034041974533e-07, "loss": 0.9049, "step": 11239 }, { "epoch": 0.866481652790626, "grad_norm": 4.004141330718994, "learning_rate": 4.603169652759465e-07, "loss": 0.8256, "step": 11240 }, { "epoch": 0.866558741905643, "grad_norm": 4.334702968597412, "learning_rate": 4.5979387315787215e-07, "loss": 0.9637, "step": 11241 }, { "epoch": 0.8666358310206599, "grad_norm": 3.4571897983551025, "learning_rate": 4.592710640981352e-07, "loss": 0.8333, "step": 11242 }, { "epoch": 0.8667129201356768, "grad_norm": 3.4184558391571045, "learning_rate": 4.5874853812933107e-07, "loss": 0.8278, "step": 11243 }, { "epoch": 0.8667900092506938, "grad_norm": 3.8723700046539307, "learning_rate": 4.582262952840355e-07, "loss": 0.9484, "step": 11244 }, { "epoch": 0.8668670983657107, "grad_norm": 3.8549139499664307, "learning_rate": 4.5770433559480854e-07, "loss": 1.0091, "step": 11245 }, { "epoch": 0.8669441874807278, "grad_norm": 3.5513107776641846, "learning_rate": 4.571826590941908e-07, "loss": 0.8842, "step": 11246 }, { "epoch": 0.8670212765957447, "grad_norm": 3.3549818992614746, "learning_rate": 4.5666126581470625e-07, "loss": 0.8307, "step": 11247 }, { "epoch": 0.8670983657107616, "grad_norm": 3.714850664138794, "learning_rate": 4.561401557888606e-07, "loss": 0.8353, "step": 11248 }, { "epoch": 0.8671754548257786, "grad_norm": 3.587151527404785, "learning_rate": 4.5561932904914395e-07, "loss": 0.9133, "step": 11249 }, { "epoch": 0.8672525439407955, "grad_norm": 3.682015895843506, "learning_rate": 4.5509878562802536e-07, "loss": 0.8385, "step": 11250 }, { "epoch": 0.8673296330558126, "grad_norm": 3.6053788661956787, "learning_rate": 4.5457852555796044e-07, "loss": 0.8828, "step": 11251 }, { "epoch": 0.8674067221708295, "grad_norm": 3.85351824760437, "learning_rate": 4.5405854887138165e-07, "loss": 0.8888, "step": 11252 }, { "epoch": 0.8674838112858464, "grad_norm": 4.928849220275879, "learning_rate": 4.5353885560070867e-07, "loss": 0.8688, "step": 11253 }, { "epoch": 0.8675609004008634, "grad_norm": 3.783874034881592, "learning_rate": 4.53019445778341e-07, "loss": 0.8879, "step": 11254 }, { "epoch": 0.8676379895158803, "grad_norm": 3.7811548709869385, "learning_rate": 4.5250031943666174e-07, "loss": 0.9084, "step": 11255 }, { "epoch": 0.8677150786308974, "grad_norm": 3.439472198486328, "learning_rate": 4.5198147660803605e-07, "loss": 0.9078, "step": 11256 }, { "epoch": 0.8677921677459143, "grad_norm": 3.847801923751831, "learning_rate": 4.5146291732480975e-07, "loss": 0.9516, "step": 11257 }, { "epoch": 0.8678692568609312, "grad_norm": 3.690554141998291, "learning_rate": 4.5094464161931305e-07, "loss": 0.9484, "step": 11258 }, { "epoch": 0.8679463459759482, "grad_norm": 3.9348044395446777, "learning_rate": 4.5042664952385797e-07, "loss": 0.9123, "step": 11259 }, { "epoch": 0.8680234350909651, "grad_norm": 4.005280494689941, "learning_rate": 4.499089410707386e-07, "loss": 0.9793, "step": 11260 }, { "epoch": 0.8681005242059822, "grad_norm": 4.255978584289551, "learning_rate": 4.493915162922308e-07, "loss": 0.8457, "step": 11261 }, { "epoch": 0.8681776133209991, "grad_norm": 4.279727935791016, "learning_rate": 4.4887437522059487e-07, "loss": 0.9742, "step": 11262 }, { "epoch": 0.868254702436016, "grad_norm": 3.771867036819458, "learning_rate": 4.4835751788807e-07, "loss": 0.9787, "step": 11263 }, { "epoch": 0.868331791551033, "grad_norm": 3.949169635772705, "learning_rate": 4.4784094432687997e-07, "loss": 0.9254, "step": 11264 }, { "epoch": 0.8684088806660499, "grad_norm": 4.027673244476318, "learning_rate": 4.4732465456923113e-07, "loss": 0.9364, "step": 11265 }, { "epoch": 0.868485969781067, "grad_norm": 3.5182044506073, "learning_rate": 4.468086486473111e-07, "loss": 1.0193, "step": 11266 }, { "epoch": 0.8685630588960839, "grad_norm": 3.757246494293213, "learning_rate": 4.462929265932897e-07, "loss": 0.9157, "step": 11267 }, { "epoch": 0.8686401480111008, "grad_norm": 3.785848379135132, "learning_rate": 4.457774884393207e-07, "loss": 0.8675, "step": 11268 }, { "epoch": 0.8687172371261178, "grad_norm": 4.203325271606445, "learning_rate": 4.452623342175383e-07, "loss": 0.9538, "step": 11269 }, { "epoch": 0.8687943262411347, "grad_norm": 3.8824243545532227, "learning_rate": 4.447474639600596e-07, "loss": 0.9154, "step": 11270 }, { "epoch": 0.8688714153561518, "grad_norm": 3.8376996517181396, "learning_rate": 4.442328776989846e-07, "loss": 0.9482, "step": 11271 }, { "epoch": 0.8689485044711687, "grad_norm": 3.8418562412261963, "learning_rate": 4.437185754663953e-07, "loss": 0.9741, "step": 11272 }, { "epoch": 0.8690255935861856, "grad_norm": 3.5321462154388428, "learning_rate": 4.432045572943544e-07, "loss": 0.885, "step": 11273 }, { "epoch": 0.8691026827012026, "grad_norm": 4.0153374671936035, "learning_rate": 4.4269082321490906e-07, "loss": 0.9149, "step": 11274 }, { "epoch": 0.8691797718162195, "grad_norm": 4.0503129959106445, "learning_rate": 4.4217737326008814e-07, "loss": 0.9256, "step": 11275 }, { "epoch": 0.8692568609312366, "grad_norm": 3.9747254848480225, "learning_rate": 4.4166420746190206e-07, "loss": 0.9465, "step": 11276 }, { "epoch": 0.8693339500462535, "grad_norm": 3.6550421714782715, "learning_rate": 4.411513258523459e-07, "loss": 0.8072, "step": 11277 }, { "epoch": 0.8694110391612704, "grad_norm": 3.5995473861694336, "learning_rate": 4.406387284633923e-07, "loss": 0.7394, "step": 11278 }, { "epoch": 0.8694881282762874, "grad_norm": 3.6607563495635986, "learning_rate": 4.4012641532700075e-07, "loss": 0.9314, "step": 11279 }, { "epoch": 0.8695652173913043, "grad_norm": 3.560035228729248, "learning_rate": 4.3961438647511066e-07, "loss": 0.8134, "step": 11280 }, { "epoch": 0.8696423065063213, "grad_norm": 3.919574022293091, "learning_rate": 4.391026419396449e-07, "loss": 0.9771, "step": 11281 }, { "epoch": 0.8697193956213383, "grad_norm": 3.7704415321350098, "learning_rate": 4.3859118175250905e-07, "loss": 0.8802, "step": 11282 }, { "epoch": 0.8697964847363552, "grad_norm": 3.845430850982666, "learning_rate": 4.3808000594558754e-07, "loss": 0.8919, "step": 11283 }, { "epoch": 0.8698735738513722, "grad_norm": 3.642606735229492, "learning_rate": 4.3756911455075155e-07, "loss": 0.8085, "step": 11284 }, { "epoch": 0.8699506629663891, "grad_norm": 3.7264983654022217, "learning_rate": 4.3705850759985127e-07, "loss": 0.9121, "step": 11285 }, { "epoch": 0.8700277520814061, "grad_norm": 3.6813814640045166, "learning_rate": 4.3654818512472106e-07, "loss": 0.8442, "step": 11286 }, { "epoch": 0.8701048411964231, "grad_norm": 4.019516468048096, "learning_rate": 4.3603814715717674e-07, "loss": 0.8202, "step": 11287 }, { "epoch": 0.87018193031144, "grad_norm": 3.6544294357299805, "learning_rate": 4.355283937290178e-07, "loss": 0.8273, "step": 11288 }, { "epoch": 0.870259019426457, "grad_norm": 3.649919271469116, "learning_rate": 4.350189248720221e-07, "loss": 0.876, "step": 11289 }, { "epoch": 0.8703361085414739, "grad_norm": 3.7568275928497314, "learning_rate": 4.3450974061795437e-07, "loss": 0.9599, "step": 11290 }, { "epoch": 0.870413197656491, "grad_norm": 3.7730679512023926, "learning_rate": 4.3400084099855854e-07, "loss": 0.9461, "step": 11291 }, { "epoch": 0.8704902867715079, "grad_norm": 4.046023845672607, "learning_rate": 4.334922260455626e-07, "loss": 0.9321, "step": 11292 }, { "epoch": 0.8705673758865248, "grad_norm": 3.993216037750244, "learning_rate": 4.3298389579067677e-07, "loss": 0.8991, "step": 11293 }, { "epoch": 0.8706444650015418, "grad_norm": 3.6614949703216553, "learning_rate": 4.324758502655907e-07, "loss": 0.9043, "step": 11294 }, { "epoch": 0.8707215541165587, "grad_norm": 3.688453197479248, "learning_rate": 4.3196808950197954e-07, "loss": 0.9041, "step": 11295 }, { "epoch": 0.8707986432315757, "grad_norm": 3.937119245529175, "learning_rate": 4.314606135314997e-07, "loss": 0.9584, "step": 11296 }, { "epoch": 0.8708757323465927, "grad_norm": 3.8757388591766357, "learning_rate": 4.3095342238578974e-07, "loss": 0.8873, "step": 11297 }, { "epoch": 0.8709528214616096, "grad_norm": 3.7967705726623535, "learning_rate": 4.304465160964699e-07, "loss": 0.8722, "step": 11298 }, { "epoch": 0.8710299105766266, "grad_norm": 3.7907609939575195, "learning_rate": 4.299398946951444e-07, "loss": 0.9177, "step": 11299 }, { "epoch": 0.8711069996916435, "grad_norm": 3.48050594329834, "learning_rate": 4.294335582133968e-07, "loss": 0.9796, "step": 11300 }, { "epoch": 0.8711840888066605, "grad_norm": 3.578596830368042, "learning_rate": 4.289275066827947e-07, "loss": 0.9109, "step": 11301 }, { "epoch": 0.8712611779216775, "grad_norm": 3.6778552532196045, "learning_rate": 4.284217401348889e-07, "loss": 0.9468, "step": 11302 }, { "epoch": 0.8713382670366944, "grad_norm": 4.028721809387207, "learning_rate": 4.2791625860121087e-07, "loss": 0.8429, "step": 11303 }, { "epoch": 0.8714153561517114, "grad_norm": 3.7571585178375244, "learning_rate": 4.2741106211327544e-07, "loss": 0.9363, "step": 11304 }, { "epoch": 0.8714924452667283, "grad_norm": 3.5501813888549805, "learning_rate": 4.2690615070257737e-07, "loss": 0.8177, "step": 11305 }, { "epoch": 0.8715695343817453, "grad_norm": 3.985402822494507, "learning_rate": 4.264015244005959e-07, "loss": 0.8521, "step": 11306 }, { "epoch": 0.8716466234967623, "grad_norm": 3.679849624633789, "learning_rate": 4.258971832387926e-07, "loss": 0.9388, "step": 11307 }, { "epoch": 0.8717237126117792, "grad_norm": 3.909078359603882, "learning_rate": 4.253931272486095e-07, "loss": 0.9455, "step": 11308 }, { "epoch": 0.8718008017267962, "grad_norm": 3.8097403049468994, "learning_rate": 4.2488935646147253e-07, "loss": 0.9776, "step": 11309 }, { "epoch": 0.8718778908418131, "grad_norm": 3.7239317893981934, "learning_rate": 4.2438587090879e-07, "loss": 0.8996, "step": 11310 }, { "epoch": 0.8719549799568301, "grad_norm": 3.8013432025909424, "learning_rate": 4.23882670621949e-07, "loss": 0.9272, "step": 11311 }, { "epoch": 0.872032069071847, "grad_norm": 4.090017795562744, "learning_rate": 4.2337975563232437e-07, "loss": 0.8814, "step": 11312 }, { "epoch": 0.872109158186864, "grad_norm": 3.7773220539093018, "learning_rate": 4.2287712597126884e-07, "loss": 0.924, "step": 11313 }, { "epoch": 0.872186247301881, "grad_norm": 3.6810662746429443, "learning_rate": 4.223747816701196e-07, "loss": 0.8808, "step": 11314 }, { "epoch": 0.8722633364168979, "grad_norm": 3.542295455932617, "learning_rate": 4.2187272276019373e-07, "loss": 0.7952, "step": 11315 }, { "epoch": 0.8723404255319149, "grad_norm": 3.5852906703948975, "learning_rate": 4.2137094927279296e-07, "loss": 0.8106, "step": 11316 }, { "epoch": 0.8724175146469318, "grad_norm": 3.841078281402588, "learning_rate": 4.208694612392006e-07, "loss": 0.9806, "step": 11317 }, { "epoch": 0.8724946037619488, "grad_norm": 3.797222852706909, "learning_rate": 4.2036825869068097e-07, "loss": 0.9715, "step": 11318 }, { "epoch": 0.8725716928769658, "grad_norm": 3.6867878437042236, "learning_rate": 4.19867341658482e-07, "loss": 0.7848, "step": 11319 }, { "epoch": 0.8726487819919827, "grad_norm": 3.439988613128662, "learning_rate": 4.1936671017383356e-07, "loss": 0.8382, "step": 11320 }, { "epoch": 0.8727258711069997, "grad_norm": 3.4686200618743896, "learning_rate": 4.188663642679469e-07, "loss": 0.7944, "step": 11321 }, { "epoch": 0.8728029602220166, "grad_norm": 3.7739365100860596, "learning_rate": 4.1836630397201593e-07, "loss": 0.8806, "step": 11322 }, { "epoch": 0.8728800493370336, "grad_norm": 4.114104747772217, "learning_rate": 4.1786652931721683e-07, "loss": 0.8682, "step": 11323 }, { "epoch": 0.8729571384520506, "grad_norm": 3.7291269302368164, "learning_rate": 4.17367040334708e-07, "loss": 0.8022, "step": 11324 }, { "epoch": 0.8730342275670675, "grad_norm": 4.308863162994385, "learning_rate": 4.1686783705563115e-07, "loss": 1.0564, "step": 11325 }, { "epoch": 0.8731113166820845, "grad_norm": 3.66945481300354, "learning_rate": 4.163689195111076e-07, "loss": 0.8329, "step": 11326 }, { "epoch": 0.8731884057971014, "grad_norm": 3.6699118614196777, "learning_rate": 4.158702877322418e-07, "loss": 0.8636, "step": 11327 }, { "epoch": 0.8732654949121184, "grad_norm": 3.587113618850708, "learning_rate": 4.153719417501223e-07, "loss": 0.8866, "step": 11328 }, { "epoch": 0.8733425840271354, "grad_norm": 3.8809823989868164, "learning_rate": 4.148738815958181e-07, "loss": 0.8977, "step": 11329 }, { "epoch": 0.8734196731421523, "grad_norm": 3.6669769287109375, "learning_rate": 4.14376107300381e-07, "loss": 0.8677, "step": 11330 }, { "epoch": 0.8734967622571693, "grad_norm": 3.631700038909912, "learning_rate": 4.1387861889484294e-07, "loss": 0.9199, "step": 11331 }, { "epoch": 0.8735738513721862, "grad_norm": 4.055285930633545, "learning_rate": 4.1338141641022125e-07, "loss": 0.9046, "step": 11332 }, { "epoch": 0.8736509404872032, "grad_norm": 3.6422579288482666, "learning_rate": 4.128844998775133e-07, "loss": 1.0013, "step": 11333 }, { "epoch": 0.8737280296022202, "grad_norm": 3.686168909072876, "learning_rate": 4.1238786932769947e-07, "loss": 0.9467, "step": 11334 }, { "epoch": 0.8738051187172371, "grad_norm": 3.7894532680511475, "learning_rate": 4.118915247917421e-07, "loss": 0.884, "step": 11335 }, { "epoch": 0.8738822078322541, "grad_norm": 3.6736958026885986, "learning_rate": 4.113954663005865e-07, "loss": 0.9269, "step": 11336 }, { "epoch": 0.873959296947271, "grad_norm": 3.9054605960845947, "learning_rate": 4.108996938851578e-07, "loss": 0.9955, "step": 11337 }, { "epoch": 0.874036386062288, "grad_norm": 3.832731008529663, "learning_rate": 4.104042075763659e-07, "loss": 0.96, "step": 11338 }, { "epoch": 0.874113475177305, "grad_norm": 3.614189386367798, "learning_rate": 4.0990900740510155e-07, "loss": 0.8904, "step": 11339 }, { "epoch": 0.8741905642923219, "grad_norm": 3.8343393802642822, "learning_rate": 4.094140934022378e-07, "loss": 0.8866, "step": 11340 }, { "epoch": 0.8742676534073389, "grad_norm": 4.005188465118408, "learning_rate": 4.089194655986306e-07, "loss": 1.0047, "step": 11341 }, { "epoch": 0.8743447425223558, "grad_norm": 3.9065210819244385, "learning_rate": 4.084251240251164e-07, "loss": 0.992, "step": 11342 }, { "epoch": 0.8744218316373727, "grad_norm": 3.6721572875976562, "learning_rate": 4.079310687125154e-07, "loss": 0.8878, "step": 11343 }, { "epoch": 0.8744989207523898, "grad_norm": 3.8441555500030518, "learning_rate": 4.0743729969162924e-07, "loss": 0.9359, "step": 11344 }, { "epoch": 0.8745760098674067, "grad_norm": 3.992184638977051, "learning_rate": 4.0694381699324157e-07, "loss": 0.9161, "step": 11345 }, { "epoch": 0.8746530989824237, "grad_norm": 3.973717451095581, "learning_rate": 4.0645062064811945e-07, "loss": 0.9411, "step": 11346 }, { "epoch": 0.8747301880974406, "grad_norm": 3.883294105529785, "learning_rate": 4.059577106870111e-07, "loss": 0.9405, "step": 11347 }, { "epoch": 0.8748072772124575, "grad_norm": 3.9654433727264404, "learning_rate": 4.054650871406451e-07, "loss": 0.9231, "step": 11348 }, { "epoch": 0.8748843663274746, "grad_norm": 3.9687893390655518, "learning_rate": 4.0497275003973604e-07, "loss": 0.9068, "step": 11349 }, { "epoch": 0.8749614554424915, "grad_norm": 3.720082998275757, "learning_rate": 4.044806994149769e-07, "loss": 0.9742, "step": 11350 }, { "epoch": 0.8750385445575085, "grad_norm": 3.890352964401245, "learning_rate": 4.039889352970461e-07, "loss": 0.9705, "step": 11351 }, { "epoch": 0.8751156336725254, "grad_norm": 4.000323295593262, "learning_rate": 4.0349745771660233e-07, "loss": 0.9054, "step": 11352 }, { "epoch": 0.8751927227875425, "grad_norm": 4.110193729400635, "learning_rate": 4.03006266704285e-07, "loss": 0.8724, "step": 11353 }, { "epoch": 0.8752698119025594, "grad_norm": 3.643488645553589, "learning_rate": 4.0251536229071906e-07, "loss": 0.8721, "step": 11354 }, { "epoch": 0.8753469010175763, "grad_norm": 3.35498309135437, "learning_rate": 4.0202474450650786e-07, "loss": 0.8364, "step": 11355 }, { "epoch": 0.8754239901325933, "grad_norm": 3.5333361625671387, "learning_rate": 4.0153441338224187e-07, "loss": 0.8993, "step": 11356 }, { "epoch": 0.8755010792476102, "grad_norm": 3.7463607788085938, "learning_rate": 4.010443689484894e-07, "loss": 0.8257, "step": 11357 }, { "epoch": 0.8755781683626273, "grad_norm": 3.9253594875335693, "learning_rate": 4.0055461123580166e-07, "loss": 0.921, "step": 11358 }, { "epoch": 0.8756552574776442, "grad_norm": 4.455300331115723, "learning_rate": 4.0006514027471243e-07, "loss": 0.9877, "step": 11359 }, { "epoch": 0.8757323465926611, "grad_norm": 3.6334636211395264, "learning_rate": 3.9957595609573794e-07, "loss": 0.824, "step": 11360 }, { "epoch": 0.8758094357076781, "grad_norm": 4.01237154006958, "learning_rate": 3.99087058729376e-07, "loss": 0.8933, "step": 11361 }, { "epoch": 0.875886524822695, "grad_norm": 3.8893368244171143, "learning_rate": 3.985984482061089e-07, "loss": 0.9028, "step": 11362 }, { "epoch": 0.875963613937712, "grad_norm": 3.884305238723755, "learning_rate": 3.981101245563962e-07, "loss": 0.9565, "step": 11363 }, { "epoch": 0.876040703052729, "grad_norm": 4.136764049530029, "learning_rate": 3.976220878106829e-07, "loss": 0.958, "step": 11364 }, { "epoch": 0.8761177921677459, "grad_norm": 4.275717258453369, "learning_rate": 3.971343379993964e-07, "loss": 0.9941, "step": 11365 }, { "epoch": 0.8761948812827629, "grad_norm": 3.4475440979003906, "learning_rate": 3.9664687515294566e-07, "loss": 0.8929, "step": 11366 }, { "epoch": 0.8762719703977798, "grad_norm": 3.3091351985931396, "learning_rate": 3.9615969930172027e-07, "loss": 0.7643, "step": 11367 }, { "epoch": 0.8763490595127968, "grad_norm": 3.8550055027008057, "learning_rate": 3.9567281047609427e-07, "loss": 0.8662, "step": 11368 }, { "epoch": 0.8764261486278138, "grad_norm": 3.535909414291382, "learning_rate": 3.9518620870642176e-07, "loss": 0.9508, "step": 11369 }, { "epoch": 0.8765032377428307, "grad_norm": 4.034812927246094, "learning_rate": 3.946998940230401e-07, "loss": 0.8804, "step": 11370 }, { "epoch": 0.8765803268578477, "grad_norm": 3.9649436473846436, "learning_rate": 3.9421386645626894e-07, "loss": 1.0498, "step": 11371 }, { "epoch": 0.8766574159728646, "grad_norm": 3.772454261779785, "learning_rate": 3.9372812603640844e-07, "loss": 0.9142, "step": 11372 }, { "epoch": 0.8767345050878816, "grad_norm": 3.668459415435791, "learning_rate": 3.932426727937444e-07, "loss": 0.8981, "step": 11373 }, { "epoch": 0.8768115942028986, "grad_norm": 4.301527500152588, "learning_rate": 3.927575067585393e-07, "loss": 0.7799, "step": 11374 }, { "epoch": 0.8768886833179155, "grad_norm": 3.583970546722412, "learning_rate": 3.9227262796104226e-07, "loss": 0.9783, "step": 11375 }, { "epoch": 0.8769657724329325, "grad_norm": 4.123873710632324, "learning_rate": 3.91788036431483e-07, "loss": 0.9984, "step": 11376 }, { "epoch": 0.8770428615479494, "grad_norm": 3.809326648712158, "learning_rate": 3.9130373220007347e-07, "loss": 0.9792, "step": 11377 }, { "epoch": 0.8771199506629664, "grad_norm": 3.716824769973755, "learning_rate": 3.9081971529700726e-07, "loss": 0.8683, "step": 11378 }, { "epoch": 0.8771970397779834, "grad_norm": 3.9373066425323486, "learning_rate": 3.903359857524597e-07, "loss": 0.8693, "step": 11379 }, { "epoch": 0.8772741288930003, "grad_norm": 3.775418996810913, "learning_rate": 3.898525435965894e-07, "loss": 0.8815, "step": 11380 }, { "epoch": 0.8773512180080173, "grad_norm": 3.630040168762207, "learning_rate": 3.893693888595368e-07, "loss": 0.9739, "step": 11381 }, { "epoch": 0.8774283071230342, "grad_norm": 3.7354750633239746, "learning_rate": 3.8888652157142327e-07, "loss": 0.8161, "step": 11382 }, { "epoch": 0.8775053962380512, "grad_norm": 3.6053590774536133, "learning_rate": 3.8840394176235365e-07, "loss": 0.9304, "step": 11383 }, { "epoch": 0.8775824853530682, "grad_norm": 3.883542537689209, "learning_rate": 3.87921649462415e-07, "loss": 0.8643, "step": 11384 }, { "epoch": 0.8776595744680851, "grad_norm": 3.824704647064209, "learning_rate": 3.8743964470167427e-07, "loss": 0.8747, "step": 11385 }, { "epoch": 0.8777366635831021, "grad_norm": 3.9502909183502197, "learning_rate": 3.8695792751018257e-07, "loss": 0.9046, "step": 11386 }, { "epoch": 0.877813752698119, "grad_norm": 3.645355224609375, "learning_rate": 3.864764979179725e-07, "loss": 0.89, "step": 11387 }, { "epoch": 0.877890841813136, "grad_norm": 3.920626163482666, "learning_rate": 3.859953559550589e-07, "loss": 1.0779, "step": 11388 }, { "epoch": 0.877967930928153, "grad_norm": 3.494645595550537, "learning_rate": 3.855145016514389e-07, "loss": 0.9319, "step": 11389 }, { "epoch": 0.8780450200431699, "grad_norm": 3.413755178451538, "learning_rate": 3.8503393503708965e-07, "loss": 0.9129, "step": 11390 }, { "epoch": 0.8781221091581869, "grad_norm": 3.9198455810546875, "learning_rate": 3.8455365614197327e-07, "loss": 1.0013, "step": 11391 }, { "epoch": 0.8781991982732038, "grad_norm": 4.102309703826904, "learning_rate": 3.840736649960325e-07, "loss": 0.9704, "step": 11392 }, { "epoch": 0.8782762873882208, "grad_norm": 3.7239162921905518, "learning_rate": 3.8359396162919225e-07, "loss": 0.8796, "step": 11393 }, { "epoch": 0.8783533765032377, "grad_norm": 3.941939115524292, "learning_rate": 3.831145460713592e-07, "loss": 0.9241, "step": 11394 }, { "epoch": 0.8784304656182547, "grad_norm": 3.810060977935791, "learning_rate": 3.826354183524239e-07, "loss": 0.8778, "step": 11395 }, { "epoch": 0.8785075547332717, "grad_norm": 3.513016700744629, "learning_rate": 3.821565785022552e-07, "loss": 0.9103, "step": 11396 }, { "epoch": 0.8785846438482886, "grad_norm": 3.6620430946350098, "learning_rate": 3.816780265507075e-07, "loss": 0.9333, "step": 11397 }, { "epoch": 0.8786617329633056, "grad_norm": 3.8001415729522705, "learning_rate": 3.811997625276165e-07, "loss": 0.8179, "step": 11398 }, { "epoch": 0.8787388220783225, "grad_norm": 3.8821539878845215, "learning_rate": 3.807217864627982e-07, "loss": 0.9516, "step": 11399 }, { "epoch": 0.8788159111933395, "grad_norm": 3.6851859092712402, "learning_rate": 3.802440983860528e-07, "loss": 0.9707, "step": 11400 }, { "epoch": 0.8788930003083565, "grad_norm": 4.240994453430176, "learning_rate": 3.7976669832716193e-07, "loss": 0.9472, "step": 11401 }, { "epoch": 0.8789700894233734, "grad_norm": 3.83020281791687, "learning_rate": 3.7928958631588795e-07, "loss": 0.9476, "step": 11402 }, { "epoch": 0.8790471785383904, "grad_norm": 3.228642702102661, "learning_rate": 3.788127623819776e-07, "loss": 0.7694, "step": 11403 }, { "epoch": 0.8791242676534073, "grad_norm": 3.589622735977173, "learning_rate": 3.783362265551577e-07, "loss": 0.9188, "step": 11404 }, { "epoch": 0.8792013567684243, "grad_norm": 3.4474847316741943, "learning_rate": 3.7785997886513827e-07, "loss": 0.8424, "step": 11405 }, { "epoch": 0.8792784458834413, "grad_norm": 3.7509865760803223, "learning_rate": 3.7738401934161006e-07, "loss": 0.883, "step": 11406 }, { "epoch": 0.8793555349984582, "grad_norm": 3.862302541732788, "learning_rate": 3.7690834801424714e-07, "loss": 0.8483, "step": 11407 }, { "epoch": 0.8794326241134752, "grad_norm": 3.5118508338928223, "learning_rate": 3.764329649127046e-07, "loss": 0.8729, "step": 11408 }, { "epoch": 0.8795097132284921, "grad_norm": 3.7637479305267334, "learning_rate": 3.759578700666211e-07, "loss": 0.9572, "step": 11409 }, { "epoch": 0.879586802343509, "grad_norm": 3.7532765865325928, "learning_rate": 3.754830635056167e-07, "loss": 0.9194, "step": 11410 }, { "epoch": 0.8796638914585261, "grad_norm": 3.501901865005493, "learning_rate": 3.750085452592911e-07, "loss": 0.7931, "step": 11411 }, { "epoch": 0.879740980573543, "grad_norm": 3.859379529953003, "learning_rate": 3.745343153572295e-07, "loss": 0.8634, "step": 11412 }, { "epoch": 0.87981806968856, "grad_norm": 3.430656671524048, "learning_rate": 3.7406037382899716e-07, "loss": 0.9078, "step": 11413 }, { "epoch": 0.8798951588035769, "grad_norm": 3.642101526260376, "learning_rate": 3.735867207041427e-07, "loss": 0.9528, "step": 11414 }, { "epoch": 0.8799722479185939, "grad_norm": 3.803781270980835, "learning_rate": 3.7311335601219524e-07, "loss": 0.8334, "step": 11415 }, { "epoch": 0.8800493370336109, "grad_norm": 3.886075496673584, "learning_rate": 3.7264027978266727e-07, "loss": 0.945, "step": 11416 }, { "epoch": 0.8801264261486278, "grad_norm": 3.6445107460021973, "learning_rate": 3.7216749204505187e-07, "loss": 0.8265, "step": 11417 }, { "epoch": 0.8802035152636448, "grad_norm": 4.004867076873779, "learning_rate": 3.7169499282882435e-07, "loss": 0.8832, "step": 11418 }, { "epoch": 0.8802806043786617, "grad_norm": 3.6422877311706543, "learning_rate": 3.71222782163444e-07, "loss": 0.982, "step": 11419 }, { "epoch": 0.8803576934936787, "grad_norm": 3.556857109069824, "learning_rate": 3.707508600783499e-07, "loss": 0.8123, "step": 11420 }, { "epoch": 0.8804347826086957, "grad_norm": 3.529580593109131, "learning_rate": 3.7027922660296533e-07, "loss": 0.8967, "step": 11421 }, { "epoch": 0.8805118717237126, "grad_norm": 3.779604911804199, "learning_rate": 3.6980788176669223e-07, "loss": 0.8938, "step": 11422 }, { "epoch": 0.8805889608387296, "grad_norm": 3.4977400302886963, "learning_rate": 3.6933682559891717e-07, "loss": 0.9388, "step": 11423 }, { "epoch": 0.8806660499537465, "grad_norm": 3.975605010986328, "learning_rate": 3.6886605812900766e-07, "loss": 0.9501, "step": 11424 }, { "epoch": 0.8807431390687634, "grad_norm": 4.2428202629089355, "learning_rate": 3.683955793863148e-07, "loss": 0.9384, "step": 11425 }, { "epoch": 0.8808202281837805, "grad_norm": 3.509303092956543, "learning_rate": 3.6792538940017054e-07, "loss": 0.9074, "step": 11426 }, { "epoch": 0.8808973172987974, "grad_norm": 3.5925185680389404, "learning_rate": 3.6745548819988763e-07, "loss": 0.7849, "step": 11427 }, { "epoch": 0.8809744064138144, "grad_norm": 3.5892629623413086, "learning_rate": 3.669858758147621e-07, "loss": 0.8397, "step": 11428 }, { "epoch": 0.8810514955288313, "grad_norm": 3.527111291885376, "learning_rate": 3.6651655227407214e-07, "loss": 0.7728, "step": 11429 }, { "epoch": 0.8811285846438482, "grad_norm": 3.720457077026367, "learning_rate": 3.660475176070777e-07, "loss": 0.8121, "step": 11430 }, { "epoch": 0.8812056737588653, "grad_norm": 3.873420000076294, "learning_rate": 3.6557877184302093e-07, "loss": 0.9066, "step": 11431 }, { "epoch": 0.8812827628738822, "grad_norm": 3.740910530090332, "learning_rate": 3.6511031501112625e-07, "loss": 0.8271, "step": 11432 }, { "epoch": 0.8813598519888992, "grad_norm": 3.7194406986236572, "learning_rate": 3.6464214714059754e-07, "loss": 0.9262, "step": 11433 }, { "epoch": 0.8814369411039161, "grad_norm": 3.7724802494049072, "learning_rate": 3.641742682606242e-07, "loss": 0.7748, "step": 11434 }, { "epoch": 0.881514030218933, "grad_norm": 4.035959243774414, "learning_rate": 3.637066784003757e-07, "loss": 0.9279, "step": 11435 }, { "epoch": 0.8815911193339501, "grad_norm": 3.918487310409546, "learning_rate": 3.632393775890036e-07, "loss": 0.9514, "step": 11436 }, { "epoch": 0.881668208448967, "grad_norm": 3.338373899459839, "learning_rate": 3.627723658556426e-07, "loss": 0.7877, "step": 11437 }, { "epoch": 0.881745297563984, "grad_norm": 3.8025522232055664, "learning_rate": 3.6230564322940754e-07, "loss": 0.9449, "step": 11438 }, { "epoch": 0.8818223866790009, "grad_norm": 3.8101162910461426, "learning_rate": 3.618392097393958e-07, "loss": 0.9916, "step": 11439 }, { "epoch": 0.8818994757940178, "grad_norm": 3.876401662826538, "learning_rate": 3.6137306541468797e-07, "loss": 0.8952, "step": 11440 }, { "epoch": 0.8819765649090349, "grad_norm": 3.733764171600342, "learning_rate": 3.6090721028434527e-07, "loss": 0.8225, "step": 11441 }, { "epoch": 0.8820536540240518, "grad_norm": 3.6853973865509033, "learning_rate": 3.604416443774117e-07, "loss": 0.8797, "step": 11442 }, { "epoch": 0.8821307431390688, "grad_norm": 3.8799946308135986, "learning_rate": 3.599763677229134e-07, "loss": 0.9028, "step": 11443 }, { "epoch": 0.8822078322540857, "grad_norm": 3.8343660831451416, "learning_rate": 3.595113803498557e-07, "loss": 0.9354, "step": 11444 }, { "epoch": 0.8822849213691026, "grad_norm": 3.665975332260132, "learning_rate": 3.5904668228723074e-07, "loss": 0.9204, "step": 11445 }, { "epoch": 0.8823620104841197, "grad_norm": 3.6539506912231445, "learning_rate": 3.5858227356400876e-07, "loss": 0.8875, "step": 11446 }, { "epoch": 0.8824390995991366, "grad_norm": 3.846554756164551, "learning_rate": 3.581181542091444e-07, "loss": 0.875, "step": 11447 }, { "epoch": 0.8825161887141536, "grad_norm": 3.7280449867248535, "learning_rate": 3.576543242515712e-07, "loss": 0.9288, "step": 11448 }, { "epoch": 0.8825932778291705, "grad_norm": 3.6032848358154297, "learning_rate": 3.571907837202077e-07, "loss": 0.8565, "step": 11449 }, { "epoch": 0.8826703669441874, "grad_norm": 3.6691172122955322, "learning_rate": 3.5672753264395345e-07, "loss": 0.8965, "step": 11450 }, { "epoch": 0.8827474560592045, "grad_norm": 3.7892770767211914, "learning_rate": 3.5626457105168876e-07, "loss": 0.8547, "step": 11451 }, { "epoch": 0.8828245451742214, "grad_norm": 4.1033477783203125, "learning_rate": 3.558018989722778e-07, "loss": 0.9716, "step": 11452 }, { "epoch": 0.8829016342892384, "grad_norm": 3.6659538745880127, "learning_rate": 3.5533951643456686e-07, "loss": 0.8052, "step": 11453 }, { "epoch": 0.8829787234042553, "grad_norm": 4.2055864334106445, "learning_rate": 3.548774234673802e-07, "loss": 0.8538, "step": 11454 }, { "epoch": 0.8830558125192722, "grad_norm": 3.6786320209503174, "learning_rate": 3.5441562009952856e-07, "loss": 0.9322, "step": 11455 }, { "epoch": 0.8831329016342893, "grad_norm": 3.6481385231018066, "learning_rate": 3.5395410635980343e-07, "loss": 0.8989, "step": 11456 }, { "epoch": 0.8832099907493062, "grad_norm": 3.7394192218780518, "learning_rate": 3.5349288227697675e-07, "loss": 0.9274, "step": 11457 }, { "epoch": 0.8832870798643232, "grad_norm": 3.672476053237915, "learning_rate": 3.5303194787980497e-07, "loss": 0.9608, "step": 11458 }, { "epoch": 0.8833641689793401, "grad_norm": 3.525527238845825, "learning_rate": 3.5257130319702347e-07, "loss": 0.8911, "step": 11459 }, { "epoch": 0.883441258094357, "grad_norm": 3.9847187995910645, "learning_rate": 3.5211094825735147e-07, "loss": 0.895, "step": 11460 }, { "epoch": 0.883518347209374, "grad_norm": 4.699793338775635, "learning_rate": 3.516508830894894e-07, "loss": 0.956, "step": 11461 }, { "epoch": 0.883595436324391, "grad_norm": 3.901806354522705, "learning_rate": 3.5119110772212084e-07, "loss": 0.9902, "step": 11462 }, { "epoch": 0.883672525439408, "grad_norm": 4.073935031890869, "learning_rate": 3.5073162218390967e-07, "loss": 0.8234, "step": 11463 }, { "epoch": 0.8837496145544249, "grad_norm": 3.7054238319396973, "learning_rate": 3.502724265035035e-07, "loss": 0.8809, "step": 11464 }, { "epoch": 0.8838267036694418, "grad_norm": 3.653473377227783, "learning_rate": 3.498135207095288e-07, "loss": 0.8325, "step": 11465 }, { "epoch": 0.8839037927844589, "grad_norm": 3.7188761234283447, "learning_rate": 3.4935490483059775e-07, "loss": 0.8455, "step": 11466 }, { "epoch": 0.8839808818994758, "grad_norm": 4.025871276855469, "learning_rate": 3.4889657889530195e-07, "loss": 0.9839, "step": 11467 }, { "epoch": 0.8840579710144928, "grad_norm": 3.9531667232513428, "learning_rate": 3.4843854293221515e-07, "loss": 0.9269, "step": 11468 }, { "epoch": 0.8841350601295097, "grad_norm": 3.4958362579345703, "learning_rate": 3.479807969698956e-07, "loss": 0.8985, "step": 11469 }, { "epoch": 0.8842121492445266, "grad_norm": 3.8494677543640137, "learning_rate": 3.475233410368789e-07, "loss": 0.9038, "step": 11470 }, { "epoch": 0.8842892383595437, "grad_norm": 3.6966187953948975, "learning_rate": 3.470661751616866e-07, "loss": 0.816, "step": 11471 }, { "epoch": 0.8843663274745606, "grad_norm": 3.576230049133301, "learning_rate": 3.466092993728193e-07, "loss": 0.8531, "step": 11472 }, { "epoch": 0.8844434165895776, "grad_norm": 3.6708030700683594, "learning_rate": 3.461527136987619e-07, "loss": 0.8799, "step": 11473 }, { "epoch": 0.8845205057045945, "grad_norm": 4.025277614593506, "learning_rate": 3.4569641816798115e-07, "loss": 0.9407, "step": 11474 }, { "epoch": 0.8845975948196114, "grad_norm": 3.651562452316284, "learning_rate": 3.452404128089221e-07, "loss": 0.8917, "step": 11475 }, { "epoch": 0.8846746839346284, "grad_norm": 3.617121934890747, "learning_rate": 3.447846976500163e-07, "loss": 0.9336, "step": 11476 }, { "epoch": 0.8847517730496454, "grad_norm": 3.4860620498657227, "learning_rate": 3.44329272719674e-07, "loss": 0.8401, "step": 11477 }, { "epoch": 0.8848288621646624, "grad_norm": 4.102723121643066, "learning_rate": 3.4387413804628955e-07, "loss": 1.0335, "step": 11478 }, { "epoch": 0.8849059512796793, "grad_norm": 3.53051495552063, "learning_rate": 3.434192936582381e-07, "loss": 0.8077, "step": 11479 }, { "epoch": 0.8849830403946962, "grad_norm": 3.604583978652954, "learning_rate": 3.42964739583877e-07, "loss": 0.977, "step": 11480 }, { "epoch": 0.8850601295097132, "grad_norm": 3.8658695220947266, "learning_rate": 3.425104758515441e-07, "loss": 0.887, "step": 11481 }, { "epoch": 0.8851372186247302, "grad_norm": 3.5702638626098633, "learning_rate": 3.420565024895617e-07, "loss": 0.9748, "step": 11482 }, { "epoch": 0.8852143077397472, "grad_norm": 3.790424108505249, "learning_rate": 3.416028195262322e-07, "loss": 0.9366, "step": 11483 }, { "epoch": 0.8852913968547641, "grad_norm": 4.021109580993652, "learning_rate": 3.411494269898402e-07, "loss": 1.0176, "step": 11484 }, { "epoch": 0.885368485969781, "grad_norm": 4.051353454589844, "learning_rate": 3.406963249086537e-07, "loss": 0.8541, "step": 11485 }, { "epoch": 0.885445575084798, "grad_norm": 3.7868709564208984, "learning_rate": 3.4024351331091945e-07, "loss": 0.8638, "step": 11486 }, { "epoch": 0.885522664199815, "grad_norm": 3.931445598602295, "learning_rate": 3.3979099222486824e-07, "loss": 0.9284, "step": 11487 }, { "epoch": 0.885599753314832, "grad_norm": 3.800612449645996, "learning_rate": 3.3933876167871196e-07, "loss": 0.8934, "step": 11488 }, { "epoch": 0.8856768424298489, "grad_norm": 3.67228627204895, "learning_rate": 3.388868217006469e-07, "loss": 0.8676, "step": 11489 }, { "epoch": 0.8857539315448658, "grad_norm": 4.013760089874268, "learning_rate": 3.3843517231884894e-07, "loss": 0.8973, "step": 11490 }, { "epoch": 0.8858310206598828, "grad_norm": 3.6695284843444824, "learning_rate": 3.379838135614738e-07, "loss": 0.781, "step": 11491 }, { "epoch": 0.8859081097748998, "grad_norm": 3.716569185256958, "learning_rate": 3.375327454566629e-07, "loss": 0.9104, "step": 11492 }, { "epoch": 0.8859851988899168, "grad_norm": 3.993039608001709, "learning_rate": 3.370819680325377e-07, "loss": 0.8933, "step": 11493 }, { "epoch": 0.8860622880049337, "grad_norm": 3.30863356590271, "learning_rate": 3.3663148131720223e-07, "loss": 0.799, "step": 11494 }, { "epoch": 0.8861393771199506, "grad_norm": 3.9631307125091553, "learning_rate": 3.3618128533874196e-07, "loss": 0.8422, "step": 11495 }, { "epoch": 0.8862164662349676, "grad_norm": 3.697521209716797, "learning_rate": 3.357313801252238e-07, "loss": 0.8291, "step": 11496 }, { "epoch": 0.8862935553499846, "grad_norm": 4.11827278137207, "learning_rate": 3.3528176570469697e-07, "loss": 0.9668, "step": 11497 }, { "epoch": 0.8863706444650016, "grad_norm": 4.193023204803467, "learning_rate": 3.348324421051929e-07, "loss": 0.896, "step": 11498 }, { "epoch": 0.8864477335800185, "grad_norm": 3.8270843029022217, "learning_rate": 3.3438340935472436e-07, "loss": 0.9042, "step": 11499 }, { "epoch": 0.8865248226950354, "grad_norm": 3.8805384635925293, "learning_rate": 3.3393466748128657e-07, "loss": 0.9534, "step": 11500 }, { "epoch": 0.8866019118100524, "grad_norm": 3.6519830226898193, "learning_rate": 3.3348621651285663e-07, "loss": 0.8594, "step": 11501 }, { "epoch": 0.8866790009250693, "grad_norm": 3.8050684928894043, "learning_rate": 3.330380564773922e-07, "loss": 0.8864, "step": 11502 }, { "epoch": 0.8867560900400864, "grad_norm": 4.044971942901611, "learning_rate": 3.3259018740283366e-07, "loss": 0.9039, "step": 11503 }, { "epoch": 0.8868331791551033, "grad_norm": 3.61731219291687, "learning_rate": 3.321426093171043e-07, "loss": 0.8472, "step": 11504 }, { "epoch": 0.8869102682701202, "grad_norm": 3.757559061050415, "learning_rate": 3.316953222481073e-07, "loss": 0.9608, "step": 11505 }, { "epoch": 0.8869873573851372, "grad_norm": 4.073364734649658, "learning_rate": 3.312483262237304e-07, "loss": 0.97, "step": 11506 }, { "epoch": 0.8870644465001541, "grad_norm": 3.8168318271636963, "learning_rate": 3.30801621271839e-07, "loss": 1.0477, "step": 11507 }, { "epoch": 0.8871415356151712, "grad_norm": 3.800417184829712, "learning_rate": 3.303552074202848e-07, "loss": 0.9133, "step": 11508 }, { "epoch": 0.8872186247301881, "grad_norm": 3.7072558403015137, "learning_rate": 3.299090846968983e-07, "loss": 0.8998, "step": 11509 }, { "epoch": 0.887295713845205, "grad_norm": 3.8859128952026367, "learning_rate": 3.294632531294933e-07, "loss": 0.8885, "step": 11510 }, { "epoch": 0.887372802960222, "grad_norm": 3.608797311782837, "learning_rate": 3.290177127458655e-07, "loss": 0.9306, "step": 11511 }, { "epoch": 0.8874498920752389, "grad_norm": 3.846325635910034, "learning_rate": 3.285724635737919e-07, "loss": 0.8752, "step": 11512 }, { "epoch": 0.887526981190256, "grad_norm": 3.467984199523926, "learning_rate": 3.281275056410305e-07, "loss": 0.7637, "step": 11513 }, { "epoch": 0.8876040703052729, "grad_norm": 3.4977262020111084, "learning_rate": 3.276828389753234e-07, "loss": 0.8379, "step": 11514 }, { "epoch": 0.8876811594202898, "grad_norm": 3.824713945388794, "learning_rate": 3.2723846360439236e-07, "loss": 0.8603, "step": 11515 }, { "epoch": 0.8877582485353068, "grad_norm": 3.6566162109375, "learning_rate": 3.267943795559425e-07, "loss": 0.9688, "step": 11516 }, { "epoch": 0.8878353376503237, "grad_norm": 3.705967664718628, "learning_rate": 3.263505868576605e-07, "loss": 0.9366, "step": 11517 }, { "epoch": 0.8879124267653408, "grad_norm": 3.7640185356140137, "learning_rate": 3.259070855372132e-07, "loss": 0.8421, "step": 11518 }, { "epoch": 0.8879895158803577, "grad_norm": 3.6393392086029053, "learning_rate": 3.2546387562225166e-07, "loss": 0.9986, "step": 11519 }, { "epoch": 0.8880666049953746, "grad_norm": 3.8339908123016357, "learning_rate": 3.2502095714040673e-07, "loss": 0.8996, "step": 11520 }, { "epoch": 0.8881436941103916, "grad_norm": 3.783637285232544, "learning_rate": 3.245783301192934e-07, "loss": 0.9308, "step": 11521 }, { "epoch": 0.8882207832254085, "grad_norm": 3.7414445877075195, "learning_rate": 3.2413599458650744e-07, "loss": 0.8207, "step": 11522 }, { "epoch": 0.8882978723404256, "grad_norm": 3.6002695560455322, "learning_rate": 3.2369395056962404e-07, "loss": 0.8007, "step": 11523 }, { "epoch": 0.8883749614554425, "grad_norm": 3.9991655349731445, "learning_rate": 3.232521980962039e-07, "loss": 0.8721, "step": 11524 }, { "epoch": 0.8884520505704595, "grad_norm": 3.785975933074951, "learning_rate": 3.2281073719378773e-07, "loss": 0.9526, "step": 11525 }, { "epoch": 0.8885291396854764, "grad_norm": 3.625488758087158, "learning_rate": 3.22369567889898e-07, "loss": 0.8719, "step": 11526 }, { "epoch": 0.8886062288004933, "grad_norm": 3.615448236465454, "learning_rate": 3.2192869021203997e-07, "loss": 0.8264, "step": 11527 }, { "epoch": 0.8886833179155104, "grad_norm": 3.5204501152038574, "learning_rate": 3.214881041877005e-07, "loss": 0.8942, "step": 11528 }, { "epoch": 0.8887604070305273, "grad_norm": 3.519294261932373, "learning_rate": 3.210478098443459e-07, "loss": 0.7734, "step": 11529 }, { "epoch": 0.8888374961455443, "grad_norm": 3.4945321083068848, "learning_rate": 3.206078072094276e-07, "loss": 0.8511, "step": 11530 }, { "epoch": 0.8889145852605612, "grad_norm": 3.7900755405426025, "learning_rate": 3.201680963103776e-07, "loss": 0.8575, "step": 11531 }, { "epoch": 0.8889916743755781, "grad_norm": 3.660367012023926, "learning_rate": 3.197286771746094e-07, "loss": 0.91, "step": 11532 }, { "epoch": 0.8890687634905952, "grad_norm": 3.8760204315185547, "learning_rate": 3.192895498295179e-07, "loss": 0.8206, "step": 11533 }, { "epoch": 0.8891458526056121, "grad_norm": 3.871697425842285, "learning_rate": 3.188507143024816e-07, "loss": 0.9093, "step": 11534 }, { "epoch": 0.8892229417206291, "grad_norm": 3.5889370441436768, "learning_rate": 3.184121706208587e-07, "loss": 0.8424, "step": 11535 }, { "epoch": 0.889300030835646, "grad_norm": 3.674396514892578, "learning_rate": 3.1797391881199014e-07, "loss": 0.9136, "step": 11536 }, { "epoch": 0.8893771199506629, "grad_norm": 3.540079116821289, "learning_rate": 3.17535958903199e-07, "loss": 0.8915, "step": 11537 }, { "epoch": 0.88945420906568, "grad_norm": 3.643944501876831, "learning_rate": 3.170982909217907e-07, "loss": 0.9742, "step": 11538 }, { "epoch": 0.8895312981806969, "grad_norm": 3.8346221446990967, "learning_rate": 3.1666091489505004e-07, "loss": 0.9745, "step": 11539 }, { "epoch": 0.8896083872957139, "grad_norm": 3.8320624828338623, "learning_rate": 3.162238308502452e-07, "loss": 0.8823, "step": 11540 }, { "epoch": 0.8896854764107308, "grad_norm": 3.508303642272949, "learning_rate": 3.157870388146267e-07, "loss": 0.8418, "step": 11541 }, { "epoch": 0.8897625655257477, "grad_norm": 3.801394462585449, "learning_rate": 3.1535053881542657e-07, "loss": 0.9079, "step": 11542 }, { "epoch": 0.8898396546407648, "grad_norm": 3.286583662033081, "learning_rate": 3.14914330879858e-07, "loss": 0.7761, "step": 11543 }, { "epoch": 0.8899167437557817, "grad_norm": 4.076812267303467, "learning_rate": 3.14478415035116e-07, "loss": 1.0037, "step": 11544 }, { "epoch": 0.8899938328707987, "grad_norm": 3.5942256450653076, "learning_rate": 3.140427913083777e-07, "loss": 0.8007, "step": 11545 }, { "epoch": 0.8900709219858156, "grad_norm": 3.80216121673584, "learning_rate": 3.136074597268024e-07, "loss": 0.9157, "step": 11546 }, { "epoch": 0.8901480111008325, "grad_norm": 4.44173002243042, "learning_rate": 3.1317242031753013e-07, "loss": 1.0875, "step": 11547 }, { "epoch": 0.8902251002158496, "grad_norm": 3.757503032684326, "learning_rate": 3.127376731076837e-07, "loss": 0.9564, "step": 11548 }, { "epoch": 0.8903021893308665, "grad_norm": 3.6914825439453125, "learning_rate": 3.1230321812436846e-07, "loss": 0.8826, "step": 11549 }, { "epoch": 0.8903792784458835, "grad_norm": 3.583136796951294, "learning_rate": 3.118690553946685e-07, "loss": 0.8588, "step": 11550 }, { "epoch": 0.8904563675609004, "grad_norm": 3.7965340614318848, "learning_rate": 3.114351849456526e-07, "loss": 0.9515, "step": 11551 }, { "epoch": 0.8905334566759173, "grad_norm": 3.5539650917053223, "learning_rate": 3.110016068043703e-07, "loss": 0.9266, "step": 11552 }, { "epoch": 0.8906105457909343, "grad_norm": 3.8235104084014893, "learning_rate": 3.105683209978527e-07, "loss": 0.9676, "step": 11553 }, { "epoch": 0.8906876349059513, "grad_norm": 3.7492964267730713, "learning_rate": 3.101353275531138e-07, "loss": 0.8673, "step": 11554 }, { "epoch": 0.8907647240209683, "grad_norm": 3.6325714588165283, "learning_rate": 3.09702626497147e-07, "loss": 0.8821, "step": 11555 }, { "epoch": 0.8908418131359852, "grad_norm": 3.75046443939209, "learning_rate": 3.092702178569301e-07, "loss": 0.9256, "step": 11556 }, { "epoch": 0.8909189022510021, "grad_norm": 3.687927007675171, "learning_rate": 3.088381016594211e-07, "loss": 0.8995, "step": 11557 }, { "epoch": 0.8909959913660191, "grad_norm": 3.565046787261963, "learning_rate": 3.0840627793156e-07, "loss": 0.8913, "step": 11558 }, { "epoch": 0.8910730804810361, "grad_norm": 3.7868244647979736, "learning_rate": 3.0797474670026917e-07, "loss": 0.9228, "step": 11559 }, { "epoch": 0.8911501695960531, "grad_norm": 3.6656594276428223, "learning_rate": 3.0754350799245323e-07, "loss": 0.9629, "step": 11560 }, { "epoch": 0.89122725871107, "grad_norm": 3.7980310916900635, "learning_rate": 3.0711256183499574e-07, "loss": 0.9249, "step": 11561 }, { "epoch": 0.8913043478260869, "grad_norm": 3.5572173595428467, "learning_rate": 3.066819082547651e-07, "loss": 0.7583, "step": 11562 }, { "epoch": 0.8913814369411039, "grad_norm": 3.639159679412842, "learning_rate": 3.062515472786098e-07, "loss": 1.0125, "step": 11563 }, { "epoch": 0.8914585260561209, "grad_norm": 3.9139461517333984, "learning_rate": 3.0582147893336136e-07, "loss": 0.994, "step": 11564 }, { "epoch": 0.8915356151711379, "grad_norm": 3.53010630607605, "learning_rate": 3.0539170324583257e-07, "loss": 0.8378, "step": 11565 }, { "epoch": 0.8916127042861548, "grad_norm": 3.5645971298217773, "learning_rate": 3.049622202428165e-07, "loss": 0.8547, "step": 11566 }, { "epoch": 0.8916897934011717, "grad_norm": 3.8837499618530273, "learning_rate": 3.045330299510896e-07, "loss": 0.9795, "step": 11567 }, { "epoch": 0.8917668825161887, "grad_norm": 4.236751079559326, "learning_rate": 3.041041323974098e-07, "loss": 0.9627, "step": 11568 }, { "epoch": 0.8918439716312057, "grad_norm": 4.3294477462768555, "learning_rate": 3.0367552760851684e-07, "loss": 0.8706, "step": 11569 }, { "epoch": 0.8919210607462227, "grad_norm": 3.358781337738037, "learning_rate": 3.0324721561113213e-07, "loss": 0.8451, "step": 11570 }, { "epoch": 0.8919981498612396, "grad_norm": 3.78355073928833, "learning_rate": 3.028191964319582e-07, "loss": 0.771, "step": 11571 }, { "epoch": 0.8920752389762565, "grad_norm": 3.7563531398773193, "learning_rate": 3.023914700976799e-07, "loss": 0.8986, "step": 11572 }, { "epoch": 0.8921523280912735, "grad_norm": 3.557497024536133, "learning_rate": 3.019640366349641e-07, "loss": 0.9661, "step": 11573 }, { "epoch": 0.8922294172062905, "grad_norm": 3.693223476409912, "learning_rate": 3.015368960704584e-07, "loss": 0.8886, "step": 11574 }, { "epoch": 0.8923065063213075, "grad_norm": 3.728806972503662, "learning_rate": 3.0111004843079327e-07, "loss": 0.8569, "step": 11575 }, { "epoch": 0.8923835954363244, "grad_norm": 3.8139472007751465, "learning_rate": 3.0068349374258175e-07, "loss": 0.8939, "step": 11576 }, { "epoch": 0.8924606845513413, "grad_norm": 3.815910577774048, "learning_rate": 3.002572320324143e-07, "loss": 0.8666, "step": 11577 }, { "epoch": 0.8925377736663583, "grad_norm": 3.8948774337768555, "learning_rate": 2.99831263326868e-07, "loss": 0.9402, "step": 11578 }, { "epoch": 0.8926148627813753, "grad_norm": 4.020652770996094, "learning_rate": 2.994055876525004e-07, "loss": 1.011, "step": 11579 }, { "epoch": 0.8926919518963923, "grad_norm": 3.6577086448669434, "learning_rate": 2.9898020503584977e-07, "loss": 0.9003, "step": 11580 }, { "epoch": 0.8927690410114092, "grad_norm": 3.762552499771118, "learning_rate": 2.985551155034355e-07, "loss": 0.9369, "step": 11581 }, { "epoch": 0.8928461301264261, "grad_norm": 3.9927804470062256, "learning_rate": 2.9813031908176025e-07, "loss": 0.8735, "step": 11582 }, { "epoch": 0.8929232192414431, "grad_norm": 3.5767393112182617, "learning_rate": 2.977058157973084e-07, "loss": 0.9675, "step": 11583 }, { "epoch": 0.89300030835646, "grad_norm": 3.9472174644470215, "learning_rate": 2.9728160567654483e-07, "loss": 0.9394, "step": 11584 }, { "epoch": 0.8930773974714771, "grad_norm": 3.4129271507263184, "learning_rate": 2.968576887459168e-07, "loss": 0.7976, "step": 11585 }, { "epoch": 0.893154486586494, "grad_norm": 3.768298864364624, "learning_rate": 2.964340650318548e-07, "loss": 0.9286, "step": 11586 }, { "epoch": 0.8932315757015109, "grad_norm": 3.6881837844848633, "learning_rate": 2.9601073456076766e-07, "loss": 0.8934, "step": 11587 }, { "epoch": 0.8933086648165279, "grad_norm": 3.6824235916137695, "learning_rate": 2.955876973590488e-07, "loss": 0.9171, "step": 11588 }, { "epoch": 0.8933857539315448, "grad_norm": 3.3715460300445557, "learning_rate": 2.9516495345307207e-07, "loss": 0.8354, "step": 11589 }, { "epoch": 0.8934628430465619, "grad_norm": 3.638155698776245, "learning_rate": 2.947425028691936e-07, "loss": 0.8242, "step": 11590 }, { "epoch": 0.8935399321615788, "grad_norm": 3.8177831172943115, "learning_rate": 2.943203456337512e-07, "loss": 1.0496, "step": 11591 }, { "epoch": 0.8936170212765957, "grad_norm": 3.5661537647247314, "learning_rate": 2.938984817730639e-07, "loss": 0.8795, "step": 11592 }, { "epoch": 0.8936941103916127, "grad_norm": 3.6265878677368164, "learning_rate": 2.9347691131343223e-07, "loss": 0.95, "step": 11593 }, { "epoch": 0.8937711995066296, "grad_norm": 3.493656635284424, "learning_rate": 2.930556342811397e-07, "loss": 0.8027, "step": 11594 }, { "epoch": 0.8938482886216467, "grad_norm": 3.3560476303100586, "learning_rate": 2.926346507024502e-07, "loss": 0.8137, "step": 11595 }, { "epoch": 0.8939253777366636, "grad_norm": 3.855767011642456, "learning_rate": 2.922139606036106e-07, "loss": 0.9452, "step": 11596 }, { "epoch": 0.8940024668516805, "grad_norm": 4.110727787017822, "learning_rate": 2.917935640108488e-07, "loss": 1.0172, "step": 11597 }, { "epoch": 0.8940795559666975, "grad_norm": 3.6166462898254395, "learning_rate": 2.9137346095037324e-07, "loss": 0.9122, "step": 11598 }, { "epoch": 0.8941566450817144, "grad_norm": 3.6462063789367676, "learning_rate": 2.909536514483752e-07, "loss": 0.831, "step": 11599 }, { "epoch": 0.8942337341967315, "grad_norm": 3.6715078353881836, "learning_rate": 2.9053413553102874e-07, "loss": 0.8714, "step": 11600 }, { "epoch": 0.8943108233117484, "grad_norm": 3.616772174835205, "learning_rate": 2.901149132244879e-07, "loss": 1.0388, "step": 11601 }, { "epoch": 0.8943879124267653, "grad_norm": 4.153509616851807, "learning_rate": 2.896959845548902e-07, "loss": 0.8361, "step": 11602 }, { "epoch": 0.8944650015417823, "grad_norm": 3.7695071697235107, "learning_rate": 2.892773495483514e-07, "loss": 0.9392, "step": 11603 }, { "epoch": 0.8945420906567992, "grad_norm": 3.27496600151062, "learning_rate": 2.8885900823097223e-07, "loss": 0.8339, "step": 11604 }, { "epoch": 0.8946191797718163, "grad_norm": 4.047286510467529, "learning_rate": 2.8844096062883466e-07, "loss": 0.9914, "step": 11605 }, { "epoch": 0.8946962688868332, "grad_norm": 3.8037588596343994, "learning_rate": 2.880232067680011e-07, "loss": 0.8849, "step": 11606 }, { "epoch": 0.8947733580018501, "grad_norm": 3.4862048625946045, "learning_rate": 2.876057466745169e-07, "loss": 0.8648, "step": 11607 }, { "epoch": 0.8948504471168671, "grad_norm": 3.446492910385132, "learning_rate": 2.87188580374409e-07, "loss": 0.8775, "step": 11608 }, { "epoch": 0.894927536231884, "grad_norm": 3.7354066371917725, "learning_rate": 2.867717078936838e-07, "loss": 0.9698, "step": 11609 }, { "epoch": 0.8950046253469011, "grad_norm": 4.134057998657227, "learning_rate": 2.863551292583322e-07, "loss": 0.9169, "step": 11610 }, { "epoch": 0.895081714461918, "grad_norm": 3.7595643997192383, "learning_rate": 2.8593884449432617e-07, "loss": 0.9883, "step": 11611 }, { "epoch": 0.8951588035769349, "grad_norm": 3.875776529312134, "learning_rate": 2.8552285362761833e-07, "loss": 0.985, "step": 11612 }, { "epoch": 0.8952358926919519, "grad_norm": 3.6377480030059814, "learning_rate": 2.85107156684144e-07, "loss": 0.8584, "step": 11613 }, { "epoch": 0.8953129818069688, "grad_norm": 3.419172763824463, "learning_rate": 2.846917536898192e-07, "loss": 0.8829, "step": 11614 }, { "epoch": 0.8953900709219859, "grad_norm": 3.9442739486694336, "learning_rate": 2.8427664467054194e-07, "loss": 0.9979, "step": 11615 }, { "epoch": 0.8954671600370028, "grad_norm": 3.6538686752319336, "learning_rate": 2.838618296521922e-07, "loss": 0.9496, "step": 11616 }, { "epoch": 0.8955442491520197, "grad_norm": 3.4656448364257812, "learning_rate": 2.8344730866063264e-07, "loss": 0.7275, "step": 11617 }, { "epoch": 0.8956213382670367, "grad_norm": 3.758117437362671, "learning_rate": 2.8303308172170587e-07, "loss": 0.8674, "step": 11618 }, { "epoch": 0.8956984273820536, "grad_norm": 3.560523271560669, "learning_rate": 2.826191488612362e-07, "loss": 0.8544, "step": 11619 }, { "epoch": 0.8957755164970707, "grad_norm": 3.549065351486206, "learning_rate": 2.822055101050308e-07, "loss": 0.8741, "step": 11620 }, { "epoch": 0.8958526056120876, "grad_norm": 3.7114226818084717, "learning_rate": 2.817921654788769e-07, "loss": 0.9257, "step": 11621 }, { "epoch": 0.8959296947271045, "grad_norm": 4.023944854736328, "learning_rate": 2.813791150085454e-07, "loss": 1.1294, "step": 11622 }, { "epoch": 0.8960067838421215, "grad_norm": 3.9397988319396973, "learning_rate": 2.8096635871978915e-07, "loss": 0.906, "step": 11623 }, { "epoch": 0.8960838729571384, "grad_norm": 3.5151617527008057, "learning_rate": 2.8055389663833923e-07, "loss": 0.9842, "step": 11624 }, { "epoch": 0.8961609620721555, "grad_norm": 3.4286890029907227, "learning_rate": 2.801417287899111e-07, "loss": 0.9019, "step": 11625 }, { "epoch": 0.8962380511871724, "grad_norm": 3.755769968032837, "learning_rate": 2.7972985520020093e-07, "loss": 0.9232, "step": 11626 }, { "epoch": 0.8963151403021893, "grad_norm": 3.760594606399536, "learning_rate": 2.793182758948881e-07, "loss": 0.8456, "step": 11627 }, { "epoch": 0.8963922294172063, "grad_norm": 3.722588300704956, "learning_rate": 2.7890699089963225e-07, "loss": 0.923, "step": 11628 }, { "epoch": 0.8964693185322232, "grad_norm": 3.614448070526123, "learning_rate": 2.784960002400733e-07, "loss": 1.0738, "step": 11629 }, { "epoch": 0.8965464076472403, "grad_norm": 3.7189695835113525, "learning_rate": 2.7808530394183577e-07, "loss": 0.9993, "step": 11630 }, { "epoch": 0.8966234967622572, "grad_norm": 3.6947999000549316, "learning_rate": 2.776749020305236e-07, "loss": 0.8291, "step": 11631 }, { "epoch": 0.8967005858772741, "grad_norm": 3.6183085441589355, "learning_rate": 2.7726479453172415e-07, "loss": 0.7955, "step": 11632 }, { "epoch": 0.8967776749922911, "grad_norm": 3.457646608352661, "learning_rate": 2.7685498147100533e-07, "loss": 0.8498, "step": 11633 }, { "epoch": 0.896854764107308, "grad_norm": 3.7649595737457275, "learning_rate": 2.7644546287391716e-07, "loss": 0.8975, "step": 11634 }, { "epoch": 0.896931853222325, "grad_norm": 3.469334840774536, "learning_rate": 2.760362387659893e-07, "loss": 0.8225, "step": 11635 }, { "epoch": 0.897008942337342, "grad_norm": 3.689354181289673, "learning_rate": 2.756273091727363e-07, "loss": 0.9019, "step": 11636 }, { "epoch": 0.8970860314523589, "grad_norm": 3.99678635597229, "learning_rate": 2.7521867411965273e-07, "loss": 0.9491, "step": 11637 }, { "epoch": 0.8971631205673759, "grad_norm": 3.96669602394104, "learning_rate": 2.7481033363221385e-07, "loss": 0.8561, "step": 11638 }, { "epoch": 0.8972402096823928, "grad_norm": 3.8000166416168213, "learning_rate": 2.744022877358793e-07, "loss": 0.9234, "step": 11639 }, { "epoch": 0.8973172987974098, "grad_norm": 3.587712287902832, "learning_rate": 2.73994536456087e-07, "loss": 0.905, "step": 11640 }, { "epoch": 0.8973943879124268, "grad_norm": 3.3833603858947754, "learning_rate": 2.735870798182588e-07, "loss": 0.8504, "step": 11641 }, { "epoch": 0.8974714770274437, "grad_norm": 3.685027837753296, "learning_rate": 2.7317991784779727e-07, "loss": 0.9113, "step": 11642 }, { "epoch": 0.8975485661424607, "grad_norm": 3.7003281116485596, "learning_rate": 2.727730505700871e-07, "loss": 0.8788, "step": 11643 }, { "epoch": 0.8976256552574776, "grad_norm": 3.5367331504821777, "learning_rate": 2.7236647801049456e-07, "loss": 0.879, "step": 11644 }, { "epoch": 0.8977027443724946, "grad_norm": 4.164783000946045, "learning_rate": 2.719602001943672e-07, "loss": 1.0591, "step": 11645 }, { "epoch": 0.8977798334875116, "grad_norm": 3.9324138164520264, "learning_rate": 2.7155421714703424e-07, "loss": 0.9406, "step": 11646 }, { "epoch": 0.8978569226025285, "grad_norm": 4.007849216461182, "learning_rate": 2.7114852889380594e-07, "loss": 0.9425, "step": 11647 }, { "epoch": 0.8979340117175455, "grad_norm": 3.8484272956848145, "learning_rate": 2.7074313545997546e-07, "loss": 0.9126, "step": 11648 }, { "epoch": 0.8980111008325624, "grad_norm": 4.0231804847717285, "learning_rate": 2.703380368708175e-07, "loss": 0.8856, "step": 11649 }, { "epoch": 0.8980881899475794, "grad_norm": 3.677838087081909, "learning_rate": 2.6993323315158804e-07, "loss": 0.9547, "step": 11650 }, { "epoch": 0.8981652790625964, "grad_norm": 3.767169237136841, "learning_rate": 2.6952872432752295e-07, "loss": 0.9004, "step": 11651 }, { "epoch": 0.8982423681776133, "grad_norm": 3.888305187225342, "learning_rate": 2.691245104238421e-07, "loss": 0.9836, "step": 11652 }, { "epoch": 0.8983194572926303, "grad_norm": 4.260769367218018, "learning_rate": 2.687205914657465e-07, "loss": 0.9681, "step": 11653 }, { "epoch": 0.8983965464076472, "grad_norm": 3.9282066822052, "learning_rate": 2.6831696747841804e-07, "loss": 0.8736, "step": 11654 }, { "epoch": 0.8984736355226642, "grad_norm": 4.230199337005615, "learning_rate": 2.679136384870201e-07, "loss": 1.0049, "step": 11655 }, { "epoch": 0.8985507246376812, "grad_norm": 3.359044075012207, "learning_rate": 2.6751060451670033e-07, "loss": 0.7274, "step": 11656 }, { "epoch": 0.8986278137526981, "grad_norm": 3.813596487045288, "learning_rate": 2.671078655925829e-07, "loss": 0.8514, "step": 11657 }, { "epoch": 0.8987049028677151, "grad_norm": 3.9940125942230225, "learning_rate": 2.6670542173977745e-07, "loss": 0.9324, "step": 11658 }, { "epoch": 0.898781991982732, "grad_norm": 3.4712913036346436, "learning_rate": 2.6630327298337535e-07, "loss": 0.8621, "step": 11659 }, { "epoch": 0.898859081097749, "grad_norm": 3.4401824474334717, "learning_rate": 2.6590141934844715e-07, "loss": 0.7482, "step": 11660 }, { "epoch": 0.898936170212766, "grad_norm": 4.060207366943359, "learning_rate": 2.654998608600479e-07, "loss": 1.0239, "step": 11661 }, { "epoch": 0.8990132593277829, "grad_norm": 3.8618593215942383, "learning_rate": 2.6509859754321076e-07, "loss": 0.9923, "step": 11662 }, { "epoch": 0.8990903484427999, "grad_norm": 3.749800682067871, "learning_rate": 2.6469762942295363e-07, "loss": 0.9303, "step": 11663 }, { "epoch": 0.8991674375578168, "grad_norm": 3.6844890117645264, "learning_rate": 2.642969565242748e-07, "loss": 0.9534, "step": 11664 }, { "epoch": 0.8992445266728338, "grad_norm": 3.837385892868042, "learning_rate": 2.6389657887215314e-07, "loss": 0.9022, "step": 11665 }, { "epoch": 0.8993216157878507, "grad_norm": 3.7096149921417236, "learning_rate": 2.6349649649155153e-07, "loss": 0.9856, "step": 11666 }, { "epoch": 0.8993987049028677, "grad_norm": 4.130624771118164, "learning_rate": 2.6309670940741215e-07, "loss": 0.9756, "step": 11667 }, { "epoch": 0.8994757940178847, "grad_norm": 4.128146171569824, "learning_rate": 2.6269721764466016e-07, "loss": 0.9506, "step": 11668 }, { "epoch": 0.8995528831329016, "grad_norm": 3.8928797245025635, "learning_rate": 2.6229802122820114e-07, "loss": 0.9731, "step": 11669 }, { "epoch": 0.8996299722479186, "grad_norm": 4.215887069702148, "learning_rate": 2.618991201829235e-07, "loss": 0.937, "step": 11670 }, { "epoch": 0.8997070613629355, "grad_norm": 3.51582407951355, "learning_rate": 2.6150051453369684e-07, "loss": 0.8767, "step": 11671 }, { "epoch": 0.8997841504779525, "grad_norm": 3.87080979347229, "learning_rate": 2.6110220430537124e-07, "loss": 0.866, "step": 11672 }, { "epoch": 0.8998612395929695, "grad_norm": 3.93489670753479, "learning_rate": 2.607041895227802e-07, "loss": 0.9668, "step": 11673 }, { "epoch": 0.8999383287079864, "grad_norm": 3.6503822803497314, "learning_rate": 2.603064702107372e-07, "loss": 0.8732, "step": 11674 }, { "epoch": 0.9000154178230034, "grad_norm": 3.783892869949341, "learning_rate": 2.599090463940385e-07, "loss": 0.9098, "step": 11675 }, { "epoch": 0.9000925069380203, "grad_norm": 3.5961296558380127, "learning_rate": 2.5951191809746146e-07, "loss": 0.8894, "step": 11676 }, { "epoch": 0.9001695960530373, "grad_norm": 3.5369625091552734, "learning_rate": 2.591150853457641e-07, "loss": 0.8137, "step": 11677 }, { "epoch": 0.9002466851680543, "grad_norm": 3.896723508834839, "learning_rate": 2.587185481636878e-07, "loss": 0.8884, "step": 11678 }, { "epoch": 0.9003237742830712, "grad_norm": 3.630908250808716, "learning_rate": 2.583223065759538e-07, "loss": 0.9381, "step": 11679 }, { "epoch": 0.9004008633980882, "grad_norm": 3.8402626514434814, "learning_rate": 2.5792636060726684e-07, "loss": 0.7879, "step": 11680 }, { "epoch": 0.9004779525131051, "grad_norm": 3.4314825534820557, "learning_rate": 2.5753071028231104e-07, "loss": 0.7755, "step": 11681 }, { "epoch": 0.900555041628122, "grad_norm": 3.852926015853882, "learning_rate": 2.571353556257544e-07, "loss": 0.8611, "step": 11682 }, { "epoch": 0.9006321307431391, "grad_norm": 3.5122294425964355, "learning_rate": 2.56740296662244e-07, "loss": 0.8702, "step": 11683 }, { "epoch": 0.900709219858156, "grad_norm": 3.657466411590576, "learning_rate": 2.5634553341640943e-07, "loss": 0.8854, "step": 11684 }, { "epoch": 0.900786308973173, "grad_norm": 3.6080384254455566, "learning_rate": 2.5595106591286335e-07, "loss": 0.8445, "step": 11685 }, { "epoch": 0.9008633980881899, "grad_norm": 3.5531303882598877, "learning_rate": 2.555568941761982e-07, "loss": 0.8449, "step": 11686 }, { "epoch": 0.9009404872032069, "grad_norm": 3.68255352973938, "learning_rate": 2.5516301823098944e-07, "loss": 0.9405, "step": 11687 }, { "epoch": 0.9010175763182239, "grad_norm": 3.9351344108581543, "learning_rate": 2.547694381017912e-07, "loss": 0.8447, "step": 11688 }, { "epoch": 0.9010946654332408, "grad_norm": 4.277076244354248, "learning_rate": 2.5437615381314284e-07, "loss": 0.8522, "step": 11689 }, { "epoch": 0.9011717545482578, "grad_norm": 3.9808146953582764, "learning_rate": 2.5398316538956246e-07, "loss": 0.8612, "step": 11690 }, { "epoch": 0.9012488436632747, "grad_norm": 3.763561725616455, "learning_rate": 2.5359047285555214e-07, "loss": 0.9626, "step": 11691 }, { "epoch": 0.9013259327782918, "grad_norm": 3.6474595069885254, "learning_rate": 2.5319807623559287e-07, "loss": 0.9554, "step": 11692 }, { "epoch": 0.9014030218933087, "grad_norm": 4.234410762786865, "learning_rate": 2.5280597555415067e-07, "loss": 0.8349, "step": 11693 }, { "epoch": 0.9014801110083256, "grad_norm": 3.9878194332122803, "learning_rate": 2.524141708356681e-07, "loss": 0.7692, "step": 11694 }, { "epoch": 0.9015572001233426, "grad_norm": 3.9393675327301025, "learning_rate": 2.52022662104574e-07, "loss": 0.9336, "step": 11695 }, { "epoch": 0.9016342892383595, "grad_norm": 3.7222704887390137, "learning_rate": 2.5163144938527675e-07, "loss": 0.8467, "step": 11696 }, { "epoch": 0.9017113783533766, "grad_norm": 3.774393081665039, "learning_rate": 2.5124053270216553e-07, "loss": 0.9367, "step": 11697 }, { "epoch": 0.9017884674683935, "grad_norm": 4.058575630187988, "learning_rate": 2.5084991207961374e-07, "loss": 0.896, "step": 11698 }, { "epoch": 0.9018655565834104, "grad_norm": 3.4426233768463135, "learning_rate": 2.50459587541973e-07, "loss": 0.8279, "step": 11699 }, { "epoch": 0.9019426456984274, "grad_norm": 3.468212842941284, "learning_rate": 2.5006955911357823e-07, "loss": 0.8706, "step": 11700 }, { "epoch": 0.9020197348134443, "grad_norm": 3.775519609451294, "learning_rate": 2.496798268187456e-07, "loss": 0.8586, "step": 11701 }, { "epoch": 0.9020968239284614, "grad_norm": 3.4390523433685303, "learning_rate": 2.492903906817734e-07, "loss": 0.781, "step": 11702 }, { "epoch": 0.9021739130434783, "grad_norm": 3.695444107055664, "learning_rate": 2.4890125072694114e-07, "loss": 0.9408, "step": 11703 }, { "epoch": 0.9022510021584952, "grad_norm": 3.9369289875030518, "learning_rate": 2.4851240697850997e-07, "loss": 0.8868, "step": 11704 }, { "epoch": 0.9023280912735122, "grad_norm": 3.8416519165039062, "learning_rate": 2.481238594607205e-07, "loss": 0.8003, "step": 11705 }, { "epoch": 0.9024051803885291, "grad_norm": 4.129755020141602, "learning_rate": 2.477356081977983e-07, "loss": 0.9659, "step": 11706 }, { "epoch": 0.9024822695035462, "grad_norm": 4.082637786865234, "learning_rate": 2.4734765321394793e-07, "loss": 0.9412, "step": 11707 }, { "epoch": 0.9025593586185631, "grad_norm": 3.7622687816619873, "learning_rate": 2.469599945333567e-07, "loss": 0.9492, "step": 11708 }, { "epoch": 0.90263644773358, "grad_norm": 4.142516136169434, "learning_rate": 2.465726321801942e-07, "loss": 1.0039, "step": 11709 }, { "epoch": 0.902713536848597, "grad_norm": 3.67720103263855, "learning_rate": 2.4618556617860777e-07, "loss": 0.9048, "step": 11710 }, { "epoch": 0.9027906259636139, "grad_norm": 3.787903308868408, "learning_rate": 2.45798796552732e-07, "loss": 0.8038, "step": 11711 }, { "epoch": 0.902867715078631, "grad_norm": 4.044528007507324, "learning_rate": 2.454123233266781e-07, "loss": 0.9192, "step": 11712 }, { "epoch": 0.9029448041936479, "grad_norm": 3.6001529693603516, "learning_rate": 2.4502614652454184e-07, "loss": 0.8692, "step": 11713 }, { "epoch": 0.9030218933086648, "grad_norm": 3.8225014209747314, "learning_rate": 2.4464026617039784e-07, "loss": 0.871, "step": 11714 }, { "epoch": 0.9030989824236818, "grad_norm": 3.8792974948883057, "learning_rate": 2.4425468228830516e-07, "loss": 0.808, "step": 11715 }, { "epoch": 0.9031760715386987, "grad_norm": 3.640425682067871, "learning_rate": 2.438693949023019e-07, "loss": 0.7728, "step": 11716 }, { "epoch": 0.9032531606537157, "grad_norm": 4.195462703704834, "learning_rate": 2.434844040364098e-07, "loss": 0.9055, "step": 11717 }, { "epoch": 0.9033302497687327, "grad_norm": 3.6664235591888428, "learning_rate": 2.4309970971462984e-07, "loss": 0.8798, "step": 11718 }, { "epoch": 0.9034073388837496, "grad_norm": 3.623297691345215, "learning_rate": 2.427153119609477e-07, "loss": 0.8227, "step": 11719 }, { "epoch": 0.9034844279987666, "grad_norm": 3.899339199066162, "learning_rate": 2.4233121079932585e-07, "loss": 0.884, "step": 11720 }, { "epoch": 0.9035615171137835, "grad_norm": 3.89997935295105, "learning_rate": 2.4194740625371303e-07, "loss": 0.8831, "step": 11721 }, { "epoch": 0.9036386062288005, "grad_norm": 3.7729170322418213, "learning_rate": 2.4156389834803616e-07, "loss": 0.8071, "step": 11722 }, { "epoch": 0.9037156953438175, "grad_norm": 3.915086507797241, "learning_rate": 2.411806871062061e-07, "loss": 1.0403, "step": 11723 }, { "epoch": 0.9037927844588344, "grad_norm": 3.809762716293335, "learning_rate": 2.4079777255211434e-07, "loss": 0.864, "step": 11724 }, { "epoch": 0.9038698735738514, "grad_norm": 4.3212714195251465, "learning_rate": 2.4041515470963227e-07, "loss": 1.0022, "step": 11725 }, { "epoch": 0.9039469626888683, "grad_norm": 3.7529261112213135, "learning_rate": 2.400328336026148e-07, "loss": 0.9564, "step": 11726 }, { "epoch": 0.9040240518038853, "grad_norm": 3.535996675491333, "learning_rate": 2.396508092548977e-07, "loss": 0.8769, "step": 11727 }, { "epoch": 0.9041011409189023, "grad_norm": 3.646723747253418, "learning_rate": 2.392690816902976e-07, "loss": 0.8884, "step": 11728 }, { "epoch": 0.9041782300339192, "grad_norm": 4.039508819580078, "learning_rate": 2.3888765093261434e-07, "loss": 0.8506, "step": 11729 }, { "epoch": 0.9042553191489362, "grad_norm": 4.302793502807617, "learning_rate": 2.385065170056283e-07, "loss": 0.9042, "step": 11730 }, { "epoch": 0.9043324082639531, "grad_norm": 3.591334104537964, "learning_rate": 2.3812567993309943e-07, "loss": 0.7687, "step": 11731 }, { "epoch": 0.9044094973789701, "grad_norm": 3.6448166370391846, "learning_rate": 2.3774513973877254e-07, "loss": 0.8568, "step": 11732 }, { "epoch": 0.904486586493987, "grad_norm": 3.65342116355896, "learning_rate": 2.3736489644637152e-07, "loss": 0.8879, "step": 11733 }, { "epoch": 0.904563675609004, "grad_norm": 3.8169586658477783, "learning_rate": 2.3698495007960286e-07, "loss": 0.9232, "step": 11734 }, { "epoch": 0.904640764724021, "grad_norm": 3.728663921356201, "learning_rate": 2.3660530066215493e-07, "loss": 0.8023, "step": 11735 }, { "epoch": 0.9047178538390379, "grad_norm": 3.7461793422698975, "learning_rate": 2.3622594821769595e-07, "loss": 0.8047, "step": 11736 }, { "epoch": 0.9047949429540549, "grad_norm": 3.480088472366333, "learning_rate": 2.358468927698765e-07, "loss": 0.9529, "step": 11737 }, { "epoch": 0.9048720320690719, "grad_norm": 3.421665668487549, "learning_rate": 2.354681343423293e-07, "loss": 0.8599, "step": 11738 }, { "epoch": 0.9049491211840888, "grad_norm": 3.9536914825439453, "learning_rate": 2.350896729586677e-07, "loss": 0.7775, "step": 11739 }, { "epoch": 0.9050262102991058, "grad_norm": 3.6503090858459473, "learning_rate": 2.347115086424867e-07, "loss": 0.9699, "step": 11740 }, { "epoch": 0.9051032994141227, "grad_norm": 3.8761227130889893, "learning_rate": 2.3433364141736414e-07, "loss": 0.9433, "step": 11741 }, { "epoch": 0.9051803885291397, "grad_norm": 3.4743881225585938, "learning_rate": 2.3395607130685616e-07, "loss": 0.9262, "step": 11742 }, { "epoch": 0.9052574776441566, "grad_norm": 3.9404661655426025, "learning_rate": 2.3357879833450335e-07, "loss": 0.8681, "step": 11743 }, { "epoch": 0.9053345667591736, "grad_norm": 3.5697805881500244, "learning_rate": 2.332018225238264e-07, "loss": 0.7547, "step": 11744 }, { "epoch": 0.9054116558741906, "grad_norm": 3.835613965988159, "learning_rate": 2.3282514389832755e-07, "loss": 0.9139, "step": 11745 }, { "epoch": 0.9054887449892075, "grad_norm": 3.496504306793213, "learning_rate": 2.3244876248149196e-07, "loss": 0.9144, "step": 11746 }, { "epoch": 0.9055658341042245, "grad_norm": 3.6928272247314453, "learning_rate": 2.3207267829678416e-07, "loss": 0.8003, "step": 11747 }, { "epoch": 0.9056429232192414, "grad_norm": 3.711456537246704, "learning_rate": 2.3169689136765038e-07, "loss": 0.9611, "step": 11748 }, { "epoch": 0.9057200123342584, "grad_norm": 3.7150790691375732, "learning_rate": 2.3132140171752027e-07, "loss": 0.9735, "step": 11749 }, { "epoch": 0.9057971014492754, "grad_norm": 3.9875683784484863, "learning_rate": 2.3094620936980283e-07, "loss": 0.9415, "step": 11750 }, { "epoch": 0.9058741905642923, "grad_norm": 3.7568795680999756, "learning_rate": 2.3057131434788994e-07, "loss": 0.9143, "step": 11751 }, { "epoch": 0.9059512796793093, "grad_norm": 3.826282262802124, "learning_rate": 2.3019671667515454e-07, "loss": 0.9529, "step": 11752 }, { "epoch": 0.9060283687943262, "grad_norm": 3.865046262741089, "learning_rate": 2.2982241637494962e-07, "loss": 1.0432, "step": 11753 }, { "epoch": 0.9061054579093432, "grad_norm": 3.8025035858154297, "learning_rate": 2.2944841347061153e-07, "loss": 0.8933, "step": 11754 }, { "epoch": 0.9061825470243602, "grad_norm": 3.6848461627960205, "learning_rate": 2.2907470798545772e-07, "loss": 0.9039, "step": 11755 }, { "epoch": 0.9062596361393771, "grad_norm": 3.549743413925171, "learning_rate": 2.2870129994278733e-07, "loss": 0.9509, "step": 11756 }, { "epoch": 0.9063367252543941, "grad_norm": 3.480574607849121, "learning_rate": 2.2832818936587954e-07, "loss": 0.7861, "step": 11757 }, { "epoch": 0.906413814369411, "grad_norm": 3.917457342147827, "learning_rate": 2.2795537627799512e-07, "loss": 0.773, "step": 11758 }, { "epoch": 0.906490903484428, "grad_norm": 3.638928174972534, "learning_rate": 2.2758286070237889e-07, "loss": 0.9302, "step": 11759 }, { "epoch": 0.906567992599445, "grad_norm": 3.471503257751465, "learning_rate": 2.2721064266225335e-07, "loss": 0.8307, "step": 11760 }, { "epoch": 0.9066450817144619, "grad_norm": 3.8338592052459717, "learning_rate": 2.2683872218082659e-07, "loss": 0.8046, "step": 11761 }, { "epoch": 0.9067221708294789, "grad_norm": 3.566448450088501, "learning_rate": 2.2646709928128397e-07, "loss": 0.8662, "step": 11762 }, { "epoch": 0.9067992599444958, "grad_norm": 3.907142162322998, "learning_rate": 2.2609577398679472e-07, "loss": 0.9882, "step": 11763 }, { "epoch": 0.9068763490595128, "grad_norm": 3.876858949661255, "learning_rate": 2.2572474632050977e-07, "loss": 0.8642, "step": 11764 }, { "epoch": 0.9069534381745298, "grad_norm": 3.5765106678009033, "learning_rate": 2.253540163055601e-07, "loss": 0.829, "step": 11765 }, { "epoch": 0.9070305272895467, "grad_norm": 4.412302494049072, "learning_rate": 2.249835839650588e-07, "loss": 1.0133, "step": 11766 }, { "epoch": 0.9071076164045637, "grad_norm": 3.776357412338257, "learning_rate": 2.2461344932210084e-07, "loss": 0.8659, "step": 11767 }, { "epoch": 0.9071847055195806, "grad_norm": 4.0795392990112305, "learning_rate": 2.2424361239976212e-07, "loss": 0.9052, "step": 11768 }, { "epoch": 0.9072617946345976, "grad_norm": 3.6457784175872803, "learning_rate": 2.2387407322109922e-07, "loss": 0.8973, "step": 11769 }, { "epoch": 0.9073388837496146, "grad_norm": 3.8819169998168945, "learning_rate": 2.2350483180915206e-07, "loss": 0.9255, "step": 11770 }, { "epoch": 0.9074159728646315, "grad_norm": 4.195042610168457, "learning_rate": 2.2313588818694055e-07, "loss": 0.9305, "step": 11771 }, { "epoch": 0.9074930619796485, "grad_norm": 3.5375936031341553, "learning_rate": 2.2276724237746684e-07, "loss": 0.9001, "step": 11772 }, { "epoch": 0.9075701510946654, "grad_norm": 4.148855209350586, "learning_rate": 2.223988944037131e-07, "loss": 0.9946, "step": 11773 }, { "epoch": 0.9076472402096823, "grad_norm": 3.680772304534912, "learning_rate": 2.220308442886443e-07, "loss": 0.8558, "step": 11774 }, { "epoch": 0.9077243293246994, "grad_norm": 3.458876848220825, "learning_rate": 2.2166309205520707e-07, "loss": 0.9057, "step": 11775 }, { "epoch": 0.9078014184397163, "grad_norm": 3.533604145050049, "learning_rate": 2.2129563772632755e-07, "loss": 0.8633, "step": 11776 }, { "epoch": 0.9078785075547333, "grad_norm": 4.154286861419678, "learning_rate": 2.2092848132491628e-07, "loss": 1.0149, "step": 11777 }, { "epoch": 0.9079555966697502, "grad_norm": 3.6534740924835205, "learning_rate": 2.205616228738633e-07, "loss": 0.7558, "step": 11778 }, { "epoch": 0.9080326857847671, "grad_norm": 3.837028741836548, "learning_rate": 2.201950623960386e-07, "loss": 0.9613, "step": 11779 }, { "epoch": 0.9081097748997842, "grad_norm": 3.6078686714172363, "learning_rate": 2.1982879991429728e-07, "loss": 0.8294, "step": 11780 }, { "epoch": 0.9081868640148011, "grad_norm": 3.634082794189453, "learning_rate": 2.1946283545147274e-07, "loss": 0.9769, "step": 11781 }, { "epoch": 0.9082639531298181, "grad_norm": 3.799694776535034, "learning_rate": 2.1909716903038114e-07, "loss": 0.945, "step": 11782 }, { "epoch": 0.908341042244835, "grad_norm": 3.5379395484924316, "learning_rate": 2.1873180067382095e-07, "loss": 0.8928, "step": 11783 }, { "epoch": 0.9084181313598519, "grad_norm": 3.9974184036254883, "learning_rate": 2.1836673040456947e-07, "loss": 0.946, "step": 11784 }, { "epoch": 0.908495220474869, "grad_norm": 3.915295124053955, "learning_rate": 2.1800195824538794e-07, "loss": 1.0305, "step": 11785 }, { "epoch": 0.9085723095898859, "grad_norm": 3.7968573570251465, "learning_rate": 2.1763748421901764e-07, "loss": 0.8681, "step": 11786 }, { "epoch": 0.9086493987049029, "grad_norm": 3.79858136177063, "learning_rate": 2.172733083481815e-07, "loss": 0.8694, "step": 11787 }, { "epoch": 0.9087264878199198, "grad_norm": 3.833800792694092, "learning_rate": 2.1690943065558412e-07, "loss": 0.9167, "step": 11788 }, { "epoch": 0.9088035769349367, "grad_norm": 3.841184616088867, "learning_rate": 2.1654585116391236e-07, "loss": 0.8676, "step": 11789 }, { "epoch": 0.9088806660499538, "grad_norm": 3.632209062576294, "learning_rate": 2.1618256989583197e-07, "loss": 0.9878, "step": 11790 }, { "epoch": 0.9089577551649707, "grad_norm": 4.1442742347717285, "learning_rate": 2.1581958687399206e-07, "loss": 0.9334, "step": 11791 }, { "epoch": 0.9090348442799877, "grad_norm": 3.917574167251587, "learning_rate": 2.1545690212102344e-07, "loss": 0.822, "step": 11792 }, { "epoch": 0.9091119333950046, "grad_norm": 4.023653507232666, "learning_rate": 2.1509451565953688e-07, "loss": 0.9154, "step": 11793 }, { "epoch": 0.9091890225100215, "grad_norm": 3.650171995162964, "learning_rate": 2.1473242751212653e-07, "loss": 0.7958, "step": 11794 }, { "epoch": 0.9092661116250386, "grad_norm": 3.726482391357422, "learning_rate": 2.143706377013649e-07, "loss": 0.9593, "step": 11795 }, { "epoch": 0.9093432007400555, "grad_norm": 3.3501317501068115, "learning_rate": 2.140091462498084e-07, "loss": 0.9107, "step": 11796 }, { "epoch": 0.9094202898550725, "grad_norm": 3.5928847789764404, "learning_rate": 2.1364795317999455e-07, "loss": 0.845, "step": 11797 }, { "epoch": 0.9094973789700894, "grad_norm": 3.925431489944458, "learning_rate": 2.13287058514442e-07, "loss": 0.9261, "step": 11798 }, { "epoch": 0.9095744680851063, "grad_norm": 4.184495449066162, "learning_rate": 2.1292646227564995e-07, "loss": 1.0248, "step": 11799 }, { "epoch": 0.9096515572001234, "grad_norm": 3.948833703994751, "learning_rate": 2.125661644860999e-07, "loss": 0.9499, "step": 11800 }, { "epoch": 0.9097286463151403, "grad_norm": 3.7556886672973633, "learning_rate": 2.1220616516825497e-07, "loss": 0.909, "step": 11801 }, { "epoch": 0.9098057354301573, "grad_norm": 3.4662973880767822, "learning_rate": 2.1184646434455947e-07, "loss": 0.9219, "step": 11802 }, { "epoch": 0.9098828245451742, "grad_norm": 3.4703729152679443, "learning_rate": 2.1148706203743762e-07, "loss": 0.8062, "step": 11803 }, { "epoch": 0.9099599136601911, "grad_norm": 3.3203537464141846, "learning_rate": 2.111279582692982e-07, "loss": 0.8091, "step": 11804 }, { "epoch": 0.9100370027752082, "grad_norm": 3.9638702869415283, "learning_rate": 2.1076915306252776e-07, "loss": 0.9664, "step": 11805 }, { "epoch": 0.9101140918902251, "grad_norm": 3.615192174911499, "learning_rate": 2.104106464394967e-07, "loss": 0.8336, "step": 11806 }, { "epoch": 0.9101911810052421, "grad_norm": 4.019468784332275, "learning_rate": 2.1005243842255552e-07, "loss": 0.9677, "step": 11807 }, { "epoch": 0.910268270120259, "grad_norm": 3.62318754196167, "learning_rate": 2.0969452903403742e-07, "loss": 0.902, "step": 11808 }, { "epoch": 0.9103453592352759, "grad_norm": 3.653689384460449, "learning_rate": 2.0933691829625624e-07, "loss": 0.903, "step": 11809 }, { "epoch": 0.910422448350293, "grad_norm": 3.717650890350342, "learning_rate": 2.0897960623150581e-07, "loss": 0.9309, "step": 11810 }, { "epoch": 0.9104995374653099, "grad_norm": 3.7195537090301514, "learning_rate": 2.0862259286206387e-07, "loss": 0.9048, "step": 11811 }, { "epoch": 0.9105766265803269, "grad_norm": 3.824012279510498, "learning_rate": 2.0826587821018818e-07, "loss": 0.8867, "step": 11812 }, { "epoch": 0.9106537156953438, "grad_norm": 3.8283004760742188, "learning_rate": 2.0790946229811758e-07, "loss": 0.789, "step": 11813 }, { "epoch": 0.9107308048103607, "grad_norm": 3.8848516941070557, "learning_rate": 2.0755334514807379e-07, "loss": 0.8425, "step": 11814 }, { "epoch": 0.9108078939253778, "grad_norm": 3.4980616569519043, "learning_rate": 2.0719752678225846e-07, "loss": 0.849, "step": 11815 }, { "epoch": 0.9108849830403947, "grad_norm": 3.96785044670105, "learning_rate": 2.0684200722285386e-07, "loss": 0.8735, "step": 11816 }, { "epoch": 0.9109620721554117, "grad_norm": 3.910421133041382, "learning_rate": 2.0648678649202615e-07, "loss": 1.0155, "step": 11817 }, { "epoch": 0.9110391612704286, "grad_norm": 3.8571994304656982, "learning_rate": 2.0613186461192092e-07, "loss": 0.7947, "step": 11818 }, { "epoch": 0.9111162503854455, "grad_norm": 3.5077316761016846, "learning_rate": 2.057772416046655e-07, "loss": 0.8644, "step": 11819 }, { "epoch": 0.9111933395004626, "grad_norm": 4.075991153717041, "learning_rate": 2.0542291749237053e-07, "loss": 0.8711, "step": 11820 }, { "epoch": 0.9112704286154795, "grad_norm": 3.9802606105804443, "learning_rate": 2.050688922971239e-07, "loss": 0.7961, "step": 11821 }, { "epoch": 0.9113475177304965, "grad_norm": 4.114253520965576, "learning_rate": 2.0471516604099794e-07, "loss": 0.8708, "step": 11822 }, { "epoch": 0.9114246068455134, "grad_norm": 3.611233711242676, "learning_rate": 2.0436173874604615e-07, "loss": 0.7748, "step": 11823 }, { "epoch": 0.9115016959605303, "grad_norm": 3.610714912414551, "learning_rate": 2.0400861043430254e-07, "loss": 0.6573, "step": 11824 }, { "epoch": 0.9115787850755473, "grad_norm": 3.904794216156006, "learning_rate": 2.036557811277834e-07, "loss": 0.9248, "step": 11825 }, { "epoch": 0.9116558741905643, "grad_norm": 3.6194839477539062, "learning_rate": 2.0330325084848612e-07, "loss": 0.837, "step": 11826 }, { "epoch": 0.9117329633055813, "grad_norm": 3.733680248260498, "learning_rate": 2.029510196183876e-07, "loss": 0.8843, "step": 11827 }, { "epoch": 0.9118100524205982, "grad_norm": 3.8289010524749756, "learning_rate": 2.02599087459448e-07, "loss": 0.9078, "step": 11828 }, { "epoch": 0.9118871415356151, "grad_norm": 3.486313581466675, "learning_rate": 2.0224745439360928e-07, "loss": 0.8279, "step": 11829 }, { "epoch": 0.9119642306506321, "grad_norm": 3.5716099739074707, "learning_rate": 2.0189612044279384e-07, "loss": 0.9217, "step": 11830 }, { "epoch": 0.9120413197656491, "grad_norm": 3.4846935272216797, "learning_rate": 2.015450856289053e-07, "loss": 0.7435, "step": 11831 }, { "epoch": 0.9121184088806661, "grad_norm": 3.8818373680114746, "learning_rate": 2.0119434997382893e-07, "loss": 0.9561, "step": 11832 }, { "epoch": 0.912195497995683, "grad_norm": 3.465576171875, "learning_rate": 2.0084391349943055e-07, "loss": 0.8902, "step": 11833 }, { "epoch": 0.9122725871106999, "grad_norm": 4.149135589599609, "learning_rate": 2.0049377622755885e-07, "loss": 0.9379, "step": 11834 }, { "epoch": 0.9123496762257169, "grad_norm": 4.032505512237549, "learning_rate": 2.0014393818004295e-07, "loss": 0.8065, "step": 11835 }, { "epoch": 0.9124267653407339, "grad_norm": 4.063005447387695, "learning_rate": 1.9979439937869383e-07, "loss": 1.0096, "step": 11836 }, { "epoch": 0.9125038544557509, "grad_norm": 3.7113261222839355, "learning_rate": 1.9944515984530343e-07, "loss": 0.9335, "step": 11837 }, { "epoch": 0.9125809435707678, "grad_norm": 4.403497219085693, "learning_rate": 1.9909621960164382e-07, "loss": 0.9352, "step": 11838 }, { "epoch": 0.9126580326857847, "grad_norm": 3.810023069381714, "learning_rate": 1.9874757866947036e-07, "loss": 0.9586, "step": 11839 }, { "epoch": 0.9127351218008017, "grad_norm": 3.5744166374206543, "learning_rate": 1.98399237070519e-07, "loss": 0.8838, "step": 11840 }, { "epoch": 0.9128122109158187, "grad_norm": 3.613818407058716, "learning_rate": 1.9805119482650737e-07, "loss": 0.8913, "step": 11841 }, { "epoch": 0.9128893000308357, "grad_norm": 3.6662750244140625, "learning_rate": 1.977034519591342e-07, "loss": 0.9187, "step": 11842 }, { "epoch": 0.9129663891458526, "grad_norm": 4.007206439971924, "learning_rate": 1.9735600849007774e-07, "loss": 0.8897, "step": 11843 }, { "epoch": 0.9130434782608695, "grad_norm": 4.291807651519775, "learning_rate": 1.970088644410012e-07, "loss": 0.8999, "step": 11844 }, { "epoch": 0.9131205673758865, "grad_norm": 3.8497557640075684, "learning_rate": 1.966620198335467e-07, "loss": 0.8368, "step": 11845 }, { "epoch": 0.9131976564909035, "grad_norm": 3.969374179840088, "learning_rate": 1.963154746893392e-07, "loss": 0.8243, "step": 11846 }, { "epoch": 0.9132747456059205, "grad_norm": 3.7255005836486816, "learning_rate": 1.9596922902998193e-07, "loss": 0.8739, "step": 11847 }, { "epoch": 0.9133518347209374, "grad_norm": 3.940896511077881, "learning_rate": 1.956232828770621e-07, "loss": 0.8913, "step": 11848 }, { "epoch": 0.9134289238359543, "grad_norm": 3.7923643589019775, "learning_rate": 1.9527763625214857e-07, "loss": 0.906, "step": 11849 }, { "epoch": 0.9135060129509713, "grad_norm": 3.9090356826782227, "learning_rate": 1.9493228917679018e-07, "loss": 0.9555, "step": 11850 }, { "epoch": 0.9135831020659883, "grad_norm": 3.677847146987915, "learning_rate": 1.9458724167251753e-07, "loss": 0.8394, "step": 11851 }, { "epoch": 0.9136601911810053, "grad_norm": 3.4867942333221436, "learning_rate": 1.942424937608428e-07, "loss": 0.8895, "step": 11852 }, { "epoch": 0.9137372802960222, "grad_norm": 3.678907632827759, "learning_rate": 1.9389804546325885e-07, "loss": 0.832, "step": 11853 }, { "epoch": 0.9138143694110391, "grad_norm": 3.7241756916046143, "learning_rate": 1.935538968012396e-07, "loss": 0.8667, "step": 11854 }, { "epoch": 0.9138914585260561, "grad_norm": 3.8677916526794434, "learning_rate": 1.9321004779624232e-07, "loss": 0.8854, "step": 11855 }, { "epoch": 0.913968547641073, "grad_norm": 3.4867103099823, "learning_rate": 1.9286649846970318e-07, "loss": 0.8591, "step": 11856 }, { "epoch": 0.9140456367560901, "grad_norm": 3.9573867321014404, "learning_rate": 1.9252324884304174e-07, "loss": 0.8497, "step": 11857 }, { "epoch": 0.914122725871107, "grad_norm": 3.7366578578948975, "learning_rate": 1.9218029893765643e-07, "loss": 0.9457, "step": 11858 }, { "epoch": 0.9141998149861239, "grad_norm": 3.685131549835205, "learning_rate": 1.9183764877492905e-07, "loss": 0.8712, "step": 11859 }, { "epoch": 0.9142769041011409, "grad_norm": 3.735658645629883, "learning_rate": 1.914952983762225e-07, "loss": 0.8748, "step": 11860 }, { "epoch": 0.9143539932161578, "grad_norm": 4.044561386108398, "learning_rate": 1.9115324776288024e-07, "loss": 0.938, "step": 11861 }, { "epoch": 0.9144310823311749, "grad_norm": 3.9689042568206787, "learning_rate": 1.9081149695622693e-07, "loss": 0.8393, "step": 11862 }, { "epoch": 0.9145081714461918, "grad_norm": 3.726811170578003, "learning_rate": 1.9047004597757045e-07, "loss": 0.7973, "step": 11863 }, { "epoch": 0.9145852605612088, "grad_norm": 3.691999673843384, "learning_rate": 1.9012889484819665e-07, "loss": 0.8179, "step": 11864 }, { "epoch": 0.9146623496762257, "grad_norm": 3.436737298965454, "learning_rate": 1.8978804358937508e-07, "loss": 0.8289, "step": 11865 }, { "epoch": 0.9147394387912426, "grad_norm": 4.035014629364014, "learning_rate": 1.8944749222235658e-07, "loss": 0.9811, "step": 11866 }, { "epoch": 0.9148165279062597, "grad_norm": 3.6229660511016846, "learning_rate": 1.8910724076837196e-07, "loss": 0.9873, "step": 11867 }, { "epoch": 0.9148936170212766, "grad_norm": 3.7031772136688232, "learning_rate": 1.887672892486353e-07, "loss": 1.0282, "step": 11868 }, { "epoch": 0.9149707061362936, "grad_norm": 3.981867551803589, "learning_rate": 1.8842763768434024e-07, "loss": 0.9978, "step": 11869 }, { "epoch": 0.9150477952513105, "grad_norm": 3.8303933143615723, "learning_rate": 1.880882860966615e-07, "loss": 0.9158, "step": 11870 }, { "epoch": 0.9151248843663274, "grad_norm": 3.6401233673095703, "learning_rate": 1.8774923450675665e-07, "loss": 0.8627, "step": 11871 }, { "epoch": 0.9152019734813445, "grad_norm": 3.968207359313965, "learning_rate": 1.8741048293576424e-07, "loss": 0.9463, "step": 11872 }, { "epoch": 0.9152790625963614, "grad_norm": 3.688127040863037, "learning_rate": 1.8707203140480245e-07, "loss": 0.9945, "step": 11873 }, { "epoch": 0.9153561517113784, "grad_norm": 3.7214889526367188, "learning_rate": 1.8673387993497383e-07, "loss": 0.8459, "step": 11874 }, { "epoch": 0.9154332408263953, "grad_norm": 3.942366600036621, "learning_rate": 1.8639602854735872e-07, "loss": 0.7865, "step": 11875 }, { "epoch": 0.9155103299414122, "grad_norm": 3.5286405086517334, "learning_rate": 1.8605847726302085e-07, "loss": 0.9035, "step": 11876 }, { "epoch": 0.9155874190564293, "grad_norm": 3.608853816986084, "learning_rate": 1.8572122610300447e-07, "loss": 0.8938, "step": 11877 }, { "epoch": 0.9156645081714462, "grad_norm": 3.8086750507354736, "learning_rate": 1.8538427508833612e-07, "loss": 0.8832, "step": 11878 }, { "epoch": 0.9157415972864632, "grad_norm": 3.3695833683013916, "learning_rate": 1.8504762424002342e-07, "loss": 0.9145, "step": 11879 }, { "epoch": 0.9158186864014801, "grad_norm": 3.7012627124786377, "learning_rate": 1.8471127357905348e-07, "loss": 0.9761, "step": 11880 }, { "epoch": 0.915895775516497, "grad_norm": 3.448864459991455, "learning_rate": 1.843752231263962e-07, "loss": 0.7754, "step": 11881 }, { "epoch": 0.9159728646315141, "grad_norm": 3.5888924598693848, "learning_rate": 1.8403947290300318e-07, "loss": 0.936, "step": 11882 }, { "epoch": 0.916049953746531, "grad_norm": 3.8859617710113525, "learning_rate": 1.8370402292980703e-07, "loss": 0.9449, "step": 11883 }, { "epoch": 0.916127042861548, "grad_norm": 3.799428939819336, "learning_rate": 1.8336887322772e-07, "loss": 0.8585, "step": 11884 }, { "epoch": 0.9162041319765649, "grad_norm": 3.7906007766723633, "learning_rate": 1.8303402381763924e-07, "loss": 0.9822, "step": 11885 }, { "epoch": 0.9162812210915818, "grad_norm": 3.4046058654785156, "learning_rate": 1.8269947472043804e-07, "loss": 0.8123, "step": 11886 }, { "epoch": 0.9163583102065989, "grad_norm": 3.974215030670166, "learning_rate": 1.823652259569747e-07, "loss": 0.9087, "step": 11887 }, { "epoch": 0.9164353993216158, "grad_norm": 3.9902284145355225, "learning_rate": 1.8203127754808923e-07, "loss": 0.937, "step": 11888 }, { "epoch": 0.9165124884366328, "grad_norm": 3.7469322681427, "learning_rate": 1.8169762951460112e-07, "loss": 0.8929, "step": 11889 }, { "epoch": 0.9165895775516497, "grad_norm": 3.5326128005981445, "learning_rate": 1.8136428187731037e-07, "loss": 0.8825, "step": 11890 }, { "epoch": 0.9166666666666666, "grad_norm": 3.375762462615967, "learning_rate": 1.810312346570009e-07, "loss": 0.8043, "step": 11891 }, { "epoch": 0.9167437557816837, "grad_norm": 3.9036519527435303, "learning_rate": 1.8069848787443556e-07, "loss": 1.036, "step": 11892 }, { "epoch": 0.9168208448967006, "grad_norm": 3.671597719192505, "learning_rate": 1.8036604155035942e-07, "loss": 0.9171, "step": 11893 }, { "epoch": 0.9168979340117176, "grad_norm": 3.8690693378448486, "learning_rate": 1.8003389570549978e-07, "loss": 0.8524, "step": 11894 }, { "epoch": 0.9169750231267345, "grad_norm": 3.334073543548584, "learning_rate": 1.7970205036056287e-07, "loss": 0.9197, "step": 11895 }, { "epoch": 0.9170521122417514, "grad_norm": 3.826357126235962, "learning_rate": 1.793705055362388e-07, "loss": 0.8555, "step": 11896 }, { "epoch": 0.9171292013567685, "grad_norm": 3.7970800399780273, "learning_rate": 1.7903926125319603e-07, "loss": 0.845, "step": 11897 }, { "epoch": 0.9172062904717854, "grad_norm": 3.9418017864227295, "learning_rate": 1.7870831753208752e-07, "loss": 0.9457, "step": 11898 }, { "epoch": 0.9172833795868024, "grad_norm": 4.235414981842041, "learning_rate": 1.7837767439354503e-07, "loss": 0.9059, "step": 11899 }, { "epoch": 0.9173604687018193, "grad_norm": 3.8422882556915283, "learning_rate": 1.7804733185818378e-07, "loss": 0.852, "step": 11900 }, { "epoch": 0.9174375578168362, "grad_norm": 4.103250026702881, "learning_rate": 1.7771728994659677e-07, "loss": 0.8487, "step": 11901 }, { "epoch": 0.9175146469318533, "grad_norm": 3.8441503047943115, "learning_rate": 1.7738754867936193e-07, "loss": 0.8913, "step": 11902 }, { "epoch": 0.9175917360468702, "grad_norm": 3.94001841545105, "learning_rate": 1.7705810807703616e-07, "loss": 0.9332, "step": 11903 }, { "epoch": 0.9176688251618872, "grad_norm": 3.453479766845703, "learning_rate": 1.7672896816015861e-07, "loss": 0.917, "step": 11904 }, { "epoch": 0.9177459142769041, "grad_norm": 3.9532957077026367, "learning_rate": 1.7640012894925008e-07, "loss": 0.8864, "step": 11905 }, { "epoch": 0.917823003391921, "grad_norm": 3.767510414123535, "learning_rate": 1.7607159046481138e-07, "loss": 0.9912, "step": 11906 }, { "epoch": 0.917900092506938, "grad_norm": 3.960230588912964, "learning_rate": 1.7574335272732445e-07, "loss": 0.8642, "step": 11907 }, { "epoch": 0.917977181621955, "grad_norm": 4.039930820465088, "learning_rate": 1.7541541575725464e-07, "loss": 1.0177, "step": 11908 }, { "epoch": 0.918054270736972, "grad_norm": 4.087274074554443, "learning_rate": 1.7508777957504662e-07, "loss": 0.9013, "step": 11909 }, { "epoch": 0.9181313598519889, "grad_norm": 3.746781826019287, "learning_rate": 1.7476044420112637e-07, "loss": 0.9299, "step": 11910 }, { "epoch": 0.9182084489670058, "grad_norm": 3.875746250152588, "learning_rate": 1.744334096559025e-07, "loss": 0.8717, "step": 11911 }, { "epoch": 0.9182855380820228, "grad_norm": 4.113644123077393, "learning_rate": 1.741066759597626e-07, "loss": 1.0246, "step": 11912 }, { "epoch": 0.9183626271970398, "grad_norm": 3.610800266265869, "learning_rate": 1.7378024313307763e-07, "loss": 0.9421, "step": 11913 }, { "epoch": 0.9184397163120568, "grad_norm": 4.1041107177734375, "learning_rate": 1.7345411119619847e-07, "loss": 0.8065, "step": 11914 }, { "epoch": 0.9185168054270737, "grad_norm": 3.635951519012451, "learning_rate": 1.7312828016945836e-07, "loss": 0.9826, "step": 11915 }, { "epoch": 0.9185938945420906, "grad_norm": 3.9342377185821533, "learning_rate": 1.728027500731716e-07, "loss": 0.981, "step": 11916 }, { "epoch": 0.9186709836571076, "grad_norm": 3.4716575145721436, "learning_rate": 1.7247752092763247e-07, "loss": 0.9107, "step": 11917 }, { "epoch": 0.9187480727721246, "grad_norm": 3.786710262298584, "learning_rate": 1.7215259275311703e-07, "loss": 0.9527, "step": 11918 }, { "epoch": 0.9188251618871416, "grad_norm": 3.892406940460205, "learning_rate": 1.71827965569884e-07, "loss": 0.7812, "step": 11919 }, { "epoch": 0.9189022510021585, "grad_norm": 3.8469479084014893, "learning_rate": 1.7150363939817117e-07, "loss": 0.919, "step": 11920 }, { "epoch": 0.9189793401171754, "grad_norm": 3.4932711124420166, "learning_rate": 1.7117961425819897e-07, "loss": 0.9501, "step": 11921 }, { "epoch": 0.9190564292321924, "grad_norm": 3.6135263442993164, "learning_rate": 1.70855890170169e-07, "loss": 0.8402, "step": 11922 }, { "epoch": 0.9191335183472094, "grad_norm": 4.184671878814697, "learning_rate": 1.7053246715426297e-07, "loss": 0.8431, "step": 11923 }, { "epoch": 0.9192106074622264, "grad_norm": 3.7733452320098877, "learning_rate": 1.702093452306458e-07, "loss": 0.9064, "step": 11924 }, { "epoch": 0.9192876965772433, "grad_norm": 3.719705820083618, "learning_rate": 1.698865244194614e-07, "loss": 0.8943, "step": 11925 }, { "epoch": 0.9193647856922602, "grad_norm": 3.8758575916290283, "learning_rate": 1.6956400474083644e-07, "loss": 0.9384, "step": 11926 }, { "epoch": 0.9194418748072772, "grad_norm": 3.6898250579833984, "learning_rate": 1.6924178621487875e-07, "loss": 0.8039, "step": 11927 }, { "epoch": 0.9195189639222942, "grad_norm": 3.731034755706787, "learning_rate": 1.689198688616761e-07, "loss": 0.9268, "step": 11928 }, { "epoch": 0.9195960530373112, "grad_norm": 3.5098345279693604, "learning_rate": 1.685982527012986e-07, "loss": 0.8259, "step": 11929 }, { "epoch": 0.9196731421523281, "grad_norm": 3.9864370822906494, "learning_rate": 1.6827693775379794e-07, "loss": 0.8976, "step": 11930 }, { "epoch": 0.919750231267345, "grad_norm": 3.444135904312134, "learning_rate": 1.6795592403920591e-07, "loss": 0.8563, "step": 11931 }, { "epoch": 0.919827320382362, "grad_norm": 3.5715203285217285, "learning_rate": 1.6763521157753647e-07, "loss": 0.8746, "step": 11932 }, { "epoch": 0.919904409497379, "grad_norm": 4.088859558105469, "learning_rate": 1.6731480038878368e-07, "loss": 0.9353, "step": 11933 }, { "epoch": 0.919981498612396, "grad_norm": 3.915494918823242, "learning_rate": 1.6699469049292427e-07, "loss": 0.946, "step": 11934 }, { "epoch": 0.9200585877274129, "grad_norm": 3.923362970352173, "learning_rate": 1.666748819099151e-07, "loss": 0.9018, "step": 11935 }, { "epoch": 0.9201356768424298, "grad_norm": 3.7624075412750244, "learning_rate": 1.6635537465969463e-07, "loss": 0.8156, "step": 11936 }, { "epoch": 0.9202127659574468, "grad_norm": 3.9974863529205322, "learning_rate": 1.6603616876218308e-07, "loss": 0.8917, "step": 11937 }, { "epoch": 0.9202898550724637, "grad_norm": 4.101132392883301, "learning_rate": 1.6571726423727953e-07, "loss": 0.9253, "step": 11938 }, { "epoch": 0.9203669441874808, "grad_norm": 4.09736442565918, "learning_rate": 1.6539866110486802e-07, "loss": 0.9177, "step": 11939 }, { "epoch": 0.9204440333024977, "grad_norm": 3.8575503826141357, "learning_rate": 1.6508035938481048e-07, "loss": 0.9692, "step": 11940 }, { "epoch": 0.9205211224175146, "grad_norm": 3.686838150024414, "learning_rate": 1.6476235909695158e-07, "loss": 0.9461, "step": 11941 }, { "epoch": 0.9205982115325316, "grad_norm": 3.914886236190796, "learning_rate": 1.6444466026111826e-07, "loss": 0.9885, "step": 11942 }, { "epoch": 0.9206753006475485, "grad_norm": 4.105428695678711, "learning_rate": 1.6412726289711578e-07, "loss": 0.9431, "step": 11943 }, { "epoch": 0.9207523897625656, "grad_norm": 6.005154132843018, "learning_rate": 1.6381016702473273e-07, "loss": 0.7875, "step": 11944 }, { "epoch": 0.9208294788775825, "grad_norm": 4.297434329986572, "learning_rate": 1.6349337266373832e-07, "loss": 1.0278, "step": 11945 }, { "epoch": 0.9209065679925994, "grad_norm": 3.489044189453125, "learning_rate": 1.631768798338834e-07, "loss": 0.828, "step": 11946 }, { "epoch": 0.9209836571076164, "grad_norm": 3.5091493129730225, "learning_rate": 1.6286068855489946e-07, "loss": 0.804, "step": 11947 }, { "epoch": 0.9210607462226333, "grad_norm": 3.7728817462921143, "learning_rate": 1.625447988465001e-07, "loss": 0.9729, "step": 11948 }, { "epoch": 0.9211378353376504, "grad_norm": 3.835939884185791, "learning_rate": 1.6222921072837794e-07, "loss": 0.9318, "step": 11949 }, { "epoch": 0.9212149244526673, "grad_norm": 3.666374921798706, "learning_rate": 1.6191392422020892e-07, "loss": 0.922, "step": 11950 }, { "epoch": 0.9212920135676842, "grad_norm": 3.756239652633667, "learning_rate": 1.6159893934165006e-07, "loss": 0.9781, "step": 11951 }, { "epoch": 0.9213691026827012, "grad_norm": 3.4061012268066406, "learning_rate": 1.6128425611233844e-07, "loss": 0.7702, "step": 11952 }, { "epoch": 0.9214461917977181, "grad_norm": 3.5357954502105713, "learning_rate": 1.6096987455189338e-07, "loss": 0.8715, "step": 11953 }, { "epoch": 0.9215232809127352, "grad_norm": 4.1103596687316895, "learning_rate": 1.6065579467991422e-07, "loss": 0.8386, "step": 11954 }, { "epoch": 0.9216003700277521, "grad_norm": 3.631218433380127, "learning_rate": 1.6034201651598304e-07, "loss": 0.7605, "step": 11955 }, { "epoch": 0.921677459142769, "grad_norm": 3.968055248260498, "learning_rate": 1.600285400796614e-07, "loss": 1.0534, "step": 11956 }, { "epoch": 0.921754548257786, "grad_norm": 4.089054107666016, "learning_rate": 1.597153653904937e-07, "loss": 0.9012, "step": 11957 }, { "epoch": 0.9218316373728029, "grad_norm": 3.858142375946045, "learning_rate": 1.594024924680043e-07, "loss": 0.8826, "step": 11958 }, { "epoch": 0.92190872648782, "grad_norm": 3.8919715881347656, "learning_rate": 1.5908992133170041e-07, "loss": 0.7672, "step": 11959 }, { "epoch": 0.9219858156028369, "grad_norm": 4.094879150390625, "learning_rate": 1.5877765200106697e-07, "loss": 1.0555, "step": 11960 }, { "epoch": 0.9220629047178538, "grad_norm": 3.7169480323791504, "learning_rate": 1.5846568449557397e-07, "loss": 0.7915, "step": 11961 }, { "epoch": 0.9221399938328708, "grad_norm": 3.7812538146972656, "learning_rate": 1.5815401883467086e-07, "loss": 0.8998, "step": 11962 }, { "epoch": 0.9222170829478877, "grad_norm": 3.6201839447021484, "learning_rate": 1.578426550377876e-07, "loss": 0.8315, "step": 11963 }, { "epoch": 0.9222941720629048, "grad_norm": 3.6739892959594727, "learning_rate": 1.5753159312433762e-07, "loss": 0.9893, "step": 11964 }, { "epoch": 0.9223712611779217, "grad_norm": 3.594050884246826, "learning_rate": 1.5722083311371206e-07, "loss": 0.8761, "step": 11965 }, { "epoch": 0.9224483502929386, "grad_norm": 3.51835298538208, "learning_rate": 1.569103750252865e-07, "loss": 0.9165, "step": 11966 }, { "epoch": 0.9225254394079556, "grad_norm": 3.550865650177002, "learning_rate": 1.5660021887841603e-07, "loss": 0.8594, "step": 11967 }, { "epoch": 0.9226025285229725, "grad_norm": 3.8896725177764893, "learning_rate": 1.5629036469243686e-07, "loss": 0.8592, "step": 11968 }, { "epoch": 0.9226796176379896, "grad_norm": 3.8969407081604004, "learning_rate": 1.5598081248666742e-07, "loss": 0.8356, "step": 11969 }, { "epoch": 0.9227567067530065, "grad_norm": 3.598137378692627, "learning_rate": 1.5567156228040726e-07, "loss": 0.8937, "step": 11970 }, { "epoch": 0.9228337958680234, "grad_norm": 3.7268011569976807, "learning_rate": 1.553626140929354e-07, "loss": 0.9288, "step": 11971 }, { "epoch": 0.9229108849830404, "grad_norm": 3.700021266937256, "learning_rate": 1.5505396794351312e-07, "loss": 0.8649, "step": 11972 }, { "epoch": 0.9229879740980573, "grad_norm": 3.5971262454986572, "learning_rate": 1.5474562385138337e-07, "loss": 0.8113, "step": 11973 }, { "epoch": 0.9230650632130744, "grad_norm": 5.08428955078125, "learning_rate": 1.5443758183576962e-07, "loss": 0.9799, "step": 11974 }, { "epoch": 0.9231421523280913, "grad_norm": 4.136900424957275, "learning_rate": 1.5412984191587766e-07, "loss": 0.9596, "step": 11975 }, { "epoch": 0.9232192414431082, "grad_norm": 3.8938400745391846, "learning_rate": 1.5382240411089156e-07, "loss": 0.9409, "step": 11976 }, { "epoch": 0.9232963305581252, "grad_norm": 4.196589469909668, "learning_rate": 1.5351526843997988e-07, "loss": 0.96, "step": 11977 }, { "epoch": 0.9233734196731421, "grad_norm": 4.031776428222656, "learning_rate": 1.532084349222912e-07, "loss": 0.9206, "step": 11978 }, { "epoch": 0.9234505087881592, "grad_norm": 3.581867218017578, "learning_rate": 1.5290190357695466e-07, "loss": 0.8353, "step": 11979 }, { "epoch": 0.9235275979031761, "grad_norm": 3.6931517124176025, "learning_rate": 1.5259567442308e-07, "loss": 0.7995, "step": 11980 }, { "epoch": 0.923604687018193, "grad_norm": 3.691195011138916, "learning_rate": 1.5228974747975965e-07, "loss": 0.8052, "step": 11981 }, { "epoch": 0.92368177613321, "grad_norm": 3.95929217338562, "learning_rate": 1.5198412276606622e-07, "loss": 0.9099, "step": 11982 }, { "epoch": 0.9237588652482269, "grad_norm": 3.7074384689331055, "learning_rate": 1.5167880030105497e-07, "loss": 0.8664, "step": 11983 }, { "epoch": 0.923835954363244, "grad_norm": 4.384084224700928, "learning_rate": 1.5137378010376015e-07, "loss": 1.0385, "step": 11984 }, { "epoch": 0.9239130434782609, "grad_norm": 3.809328556060791, "learning_rate": 1.5106906219319871e-07, "loss": 0.8989, "step": 11985 }, { "epoch": 0.9239901325932778, "grad_norm": 3.73268461227417, "learning_rate": 1.5076464658836775e-07, "loss": 0.9453, "step": 11986 }, { "epoch": 0.9240672217082948, "grad_norm": 4.032083988189697, "learning_rate": 1.504605333082465e-07, "loss": 0.9235, "step": 11987 }, { "epoch": 0.9241443108233117, "grad_norm": 3.3622939586639404, "learning_rate": 1.5015672237179424e-07, "loss": 0.8173, "step": 11988 }, { "epoch": 0.9242213999383287, "grad_norm": 3.6310486793518066, "learning_rate": 1.498532137979525e-07, "loss": 0.9421, "step": 11989 }, { "epoch": 0.9242984890533457, "grad_norm": 3.632511615753174, "learning_rate": 1.495500076056433e-07, "loss": 0.8727, "step": 11990 }, { "epoch": 0.9243755781683626, "grad_norm": 4.108543872833252, "learning_rate": 1.4924710381376995e-07, "loss": 0.984, "step": 11991 }, { "epoch": 0.9244526672833796, "grad_norm": 3.7871105670928955, "learning_rate": 1.4894450244121727e-07, "loss": 0.8612, "step": 11992 }, { "epoch": 0.9245297563983965, "grad_norm": 3.8213963508605957, "learning_rate": 1.486422035068502e-07, "loss": 1.1303, "step": 11993 }, { "epoch": 0.9246068455134135, "grad_norm": 3.63419508934021, "learning_rate": 1.483402070295159e-07, "loss": 0.804, "step": 11994 }, { "epoch": 0.9246839346284305, "grad_norm": 3.4561123847961426, "learning_rate": 1.4803851302804261e-07, "loss": 0.9273, "step": 11995 }, { "epoch": 0.9247610237434474, "grad_norm": 3.9428532123565674, "learning_rate": 1.477371215212392e-07, "loss": 0.9112, "step": 11996 }, { "epoch": 0.9248381128584644, "grad_norm": 3.147937774658203, "learning_rate": 1.474360325278956e-07, "loss": 0.8637, "step": 11997 }, { "epoch": 0.9249152019734813, "grad_norm": 3.6595137119293213, "learning_rate": 1.4713524606678298e-07, "loss": 0.9017, "step": 11998 }, { "epoch": 0.9249922910884983, "grad_norm": 4.111579895019531, "learning_rate": 1.4683476215665405e-07, "loss": 1.0554, "step": 11999 }, { "epoch": 0.9250693802035153, "grad_norm": 3.70733642578125, "learning_rate": 1.465345808162427e-07, "loss": 0.9537, "step": 12000 }, { "epoch": 0.9251464693185322, "grad_norm": 3.986865758895874, "learning_rate": 1.4623470206426404e-07, "loss": 1.0235, "step": 12001 }, { "epoch": 0.9252235584335492, "grad_norm": 3.7944588661193848, "learning_rate": 1.4593512591941304e-07, "loss": 0.8312, "step": 12002 }, { "epoch": 0.9253006475485661, "grad_norm": 3.5245468616485596, "learning_rate": 1.4563585240036705e-07, "loss": 0.8643, "step": 12003 }, { "epoch": 0.9253777366635831, "grad_norm": 3.854820966720581, "learning_rate": 1.4533688152578384e-07, "loss": 0.8276, "step": 12004 }, { "epoch": 0.9254548257786, "grad_norm": 4.025414943695068, "learning_rate": 1.4503821331430358e-07, "loss": 0.859, "step": 12005 }, { "epoch": 0.925531914893617, "grad_norm": 3.724771499633789, "learning_rate": 1.447398477845463e-07, "loss": 0.8583, "step": 12006 }, { "epoch": 0.925609004008634, "grad_norm": 3.5540995597839355, "learning_rate": 1.444417849551133e-07, "loss": 0.871, "step": 12007 }, { "epoch": 0.9256860931236509, "grad_norm": 3.8475286960601807, "learning_rate": 1.4414402484458746e-07, "loss": 0.9248, "step": 12008 }, { "epoch": 0.9257631822386679, "grad_norm": 3.745168924331665, "learning_rate": 1.438465674715328e-07, "loss": 0.9334, "step": 12009 }, { "epoch": 0.9258402713536849, "grad_norm": 4.050620079040527, "learning_rate": 1.4354941285449342e-07, "loss": 0.9622, "step": 12010 }, { "epoch": 0.9259173604687018, "grad_norm": 3.5585291385650635, "learning_rate": 1.4325256101199615e-07, "loss": 0.9217, "step": 12011 }, { "epoch": 0.9259944495837188, "grad_norm": 3.585474729537964, "learning_rate": 1.4295601196254838e-07, "loss": 0.8582, "step": 12012 }, { "epoch": 0.9260715386987357, "grad_norm": 3.88104510307312, "learning_rate": 1.4265976572463815e-07, "loss": 0.9336, "step": 12013 }, { "epoch": 0.9261486278137527, "grad_norm": 3.3760218620300293, "learning_rate": 1.4236382231673395e-07, "loss": 0.8343, "step": 12014 }, { "epoch": 0.9262257169287696, "grad_norm": 3.9959182739257812, "learning_rate": 1.420681817572872e-07, "loss": 0.9329, "step": 12015 }, { "epoch": 0.9263028060437866, "grad_norm": 3.396411895751953, "learning_rate": 1.417728440647298e-07, "loss": 0.7385, "step": 12016 }, { "epoch": 0.9263798951588036, "grad_norm": 3.7224302291870117, "learning_rate": 1.4147780925747368e-07, "loss": 0.8898, "step": 12017 }, { "epoch": 0.9264569842738205, "grad_norm": 3.9120421409606934, "learning_rate": 1.4118307735391412e-07, "loss": 0.8889, "step": 12018 }, { "epoch": 0.9265340733888375, "grad_norm": 3.755709171295166, "learning_rate": 1.4088864837242422e-07, "loss": 0.9396, "step": 12019 }, { "epoch": 0.9266111625038544, "grad_norm": 3.8816263675689697, "learning_rate": 1.4059452233136094e-07, "loss": 0.8704, "step": 12020 }, { "epoch": 0.9266882516188714, "grad_norm": 3.5586330890655518, "learning_rate": 1.4030069924906241e-07, "loss": 0.8279, "step": 12021 }, { "epoch": 0.9267653407338884, "grad_norm": 3.6975462436676025, "learning_rate": 1.4000717914384677e-07, "loss": 0.9152, "step": 12022 }, { "epoch": 0.9268424298489053, "grad_norm": 3.672626256942749, "learning_rate": 1.397139620340121e-07, "loss": 0.8989, "step": 12023 }, { "epoch": 0.9269195189639223, "grad_norm": 3.6923696994781494, "learning_rate": 1.3942104793783996e-07, "loss": 0.8973, "step": 12024 }, { "epoch": 0.9269966080789392, "grad_norm": 3.7403361797332764, "learning_rate": 1.391284368735918e-07, "loss": 0.8737, "step": 12025 }, { "epoch": 0.9270736971939562, "grad_norm": 4.060670852661133, "learning_rate": 1.388361288595108e-07, "loss": 0.9774, "step": 12026 }, { "epoch": 0.9271507863089732, "grad_norm": 3.923124313354492, "learning_rate": 1.3854412391382078e-07, "loss": 0.9751, "step": 12027 }, { "epoch": 0.9272278754239901, "grad_norm": 3.894160032272339, "learning_rate": 1.3825242205472599e-07, "loss": 0.919, "step": 12028 }, { "epoch": 0.9273049645390071, "grad_norm": 3.898775815963745, "learning_rate": 1.3796102330041305e-07, "loss": 0.9102, "step": 12029 }, { "epoch": 0.927382053654024, "grad_norm": 3.7741925716400146, "learning_rate": 1.3766992766904908e-07, "loss": 0.7993, "step": 12030 }, { "epoch": 0.927459142769041, "grad_norm": 3.740205764770508, "learning_rate": 1.3737913517878286e-07, "loss": 0.9111, "step": 12031 }, { "epoch": 0.927536231884058, "grad_norm": 3.5820446014404297, "learning_rate": 1.370886458477433e-07, "loss": 0.8983, "step": 12032 }, { "epoch": 0.9276133209990749, "grad_norm": 3.634716510772705, "learning_rate": 1.3679845969404138e-07, "loss": 0.7987, "step": 12033 }, { "epoch": 0.9276904101140919, "grad_norm": 3.8393595218658447, "learning_rate": 1.3650857673576767e-07, "loss": 0.8793, "step": 12034 }, { "epoch": 0.9277674992291088, "grad_norm": 3.9487619400024414, "learning_rate": 1.362189969909955e-07, "loss": 0.8779, "step": 12035 }, { "epoch": 0.9278445883441259, "grad_norm": 3.9395010471343994, "learning_rate": 1.3592972047777874e-07, "loss": 0.9886, "step": 12036 }, { "epoch": 0.9279216774591428, "grad_norm": 3.642089366912842, "learning_rate": 1.3564074721415243e-07, "loss": 0.8256, "step": 12037 }, { "epoch": 0.9279987665741597, "grad_norm": 3.643852472305298, "learning_rate": 1.3535207721813327e-07, "loss": 0.8572, "step": 12038 }, { "epoch": 0.9280758556891767, "grad_norm": 3.6606554985046387, "learning_rate": 1.3506371050771626e-07, "loss": 0.9307, "step": 12039 }, { "epoch": 0.9281529448041936, "grad_norm": 3.6610138416290283, "learning_rate": 1.3477564710088097e-07, "loss": 0.8941, "step": 12040 }, { "epoch": 0.9282300339192107, "grad_norm": 3.769508123397827, "learning_rate": 1.3448788701558635e-07, "loss": 0.8769, "step": 12041 }, { "epoch": 0.9283071230342276, "grad_norm": 3.8001794815063477, "learning_rate": 1.3420043026977303e-07, "loss": 0.9133, "step": 12042 }, { "epoch": 0.9283842121492445, "grad_norm": 3.7807233333587646, "learning_rate": 1.3391327688136225e-07, "loss": 0.9274, "step": 12043 }, { "epoch": 0.9284613012642615, "grad_norm": 3.714263677597046, "learning_rate": 1.3362642686825688e-07, "loss": 0.8069, "step": 12044 }, { "epoch": 0.9285383903792784, "grad_norm": 3.7317070960998535, "learning_rate": 1.3333988024833989e-07, "loss": 0.9593, "step": 12045 }, { "epoch": 0.9286154794942955, "grad_norm": 3.5440969467163086, "learning_rate": 1.3305363703947581e-07, "loss": 0.8853, "step": 12046 }, { "epoch": 0.9286925686093124, "grad_norm": 3.542323112487793, "learning_rate": 1.3276769725951155e-07, "loss": 0.8715, "step": 12047 }, { "epoch": 0.9287696577243293, "grad_norm": 3.7308967113494873, "learning_rate": 1.3248206092627281e-07, "loss": 0.8932, "step": 12048 }, { "epoch": 0.9288467468393463, "grad_norm": 3.898761034011841, "learning_rate": 1.3219672805756922e-07, "loss": 0.9775, "step": 12049 }, { "epoch": 0.9289238359543632, "grad_norm": 3.488557815551758, "learning_rate": 1.319116986711877e-07, "loss": 0.9247, "step": 12050 }, { "epoch": 0.9290009250693803, "grad_norm": 4.142662048339844, "learning_rate": 1.31626972784899e-07, "loss": 0.9129, "step": 12051 }, { "epoch": 0.9290780141843972, "grad_norm": 3.6546530723571777, "learning_rate": 1.3134255041645505e-07, "loss": 0.8448, "step": 12052 }, { "epoch": 0.9291551032994141, "grad_norm": 3.8783202171325684, "learning_rate": 1.310584315835872e-07, "loss": 0.938, "step": 12053 }, { "epoch": 0.9292321924144311, "grad_norm": 3.5673296451568604, "learning_rate": 1.3077461630400967e-07, "loss": 0.9903, "step": 12054 }, { "epoch": 0.929309281529448, "grad_norm": 3.872450590133667, "learning_rate": 1.3049110459541658e-07, "loss": 0.8605, "step": 12055 }, { "epoch": 0.929386370644465, "grad_norm": 3.5778377056121826, "learning_rate": 1.3020789647548326e-07, "loss": 0.8794, "step": 12056 }, { "epoch": 0.929463459759482, "grad_norm": 3.7152254581451416, "learning_rate": 1.2992499196186614e-07, "loss": 0.8878, "step": 12057 }, { "epoch": 0.9295405488744989, "grad_norm": 3.6128242015838623, "learning_rate": 1.296423910722028e-07, "loss": 0.9107, "step": 12058 }, { "epoch": 0.9296176379895159, "grad_norm": 3.4194366931915283, "learning_rate": 1.293600938241124e-07, "loss": 0.8509, "step": 12059 }, { "epoch": 0.9296947271045328, "grad_norm": 3.806325912475586, "learning_rate": 1.2907810023519485e-07, "loss": 0.8803, "step": 12060 }, { "epoch": 0.9297718162195499, "grad_norm": 3.6678504943847656, "learning_rate": 1.2879641032302993e-07, "loss": 0.9138, "step": 12061 }, { "epoch": 0.9298489053345668, "grad_norm": 3.877365827560425, "learning_rate": 1.2851502410518025e-07, "loss": 0.9103, "step": 12062 }, { "epoch": 0.9299259944495837, "grad_norm": 3.8869688510894775, "learning_rate": 1.28233941599189e-07, "loss": 0.9129, "step": 12063 }, { "epoch": 0.9300030835646007, "grad_norm": 4.313940525054932, "learning_rate": 1.279531628225794e-07, "loss": 0.8989, "step": 12064 }, { "epoch": 0.9300801726796176, "grad_norm": 3.7056541442871094, "learning_rate": 1.2767268779285803e-07, "loss": 0.8635, "step": 12065 }, { "epoch": 0.9301572617946346, "grad_norm": 3.957458019256592, "learning_rate": 1.2739251652750917e-07, "loss": 0.8531, "step": 12066 }, { "epoch": 0.9302343509096516, "grad_norm": 4.063552379608154, "learning_rate": 1.2711264904400167e-07, "loss": 1.0151, "step": 12067 }, { "epoch": 0.9303114400246685, "grad_norm": 4.064858436584473, "learning_rate": 1.2683308535978323e-07, "loss": 0.9269, "step": 12068 }, { "epoch": 0.9303885291396855, "grad_norm": 3.7394871711730957, "learning_rate": 1.2655382549228267e-07, "loss": 0.8913, "step": 12069 }, { "epoch": 0.9304656182547024, "grad_norm": 3.8202192783355713, "learning_rate": 1.2627486945891166e-07, "loss": 0.9727, "step": 12070 }, { "epoch": 0.9305427073697194, "grad_norm": 3.5857367515563965, "learning_rate": 1.2599621727706014e-07, "loss": 0.9116, "step": 12071 }, { "epoch": 0.9306197964847364, "grad_norm": 3.656193494796753, "learning_rate": 1.2571786896410144e-07, "loss": 0.8853, "step": 12072 }, { "epoch": 0.9306968855997533, "grad_norm": 3.4861977100372314, "learning_rate": 1.2543982453738945e-07, "loss": 0.8542, "step": 12073 }, { "epoch": 0.9307739747147703, "grad_norm": 3.6881868839263916, "learning_rate": 1.251620840142581e-07, "loss": 0.9306, "step": 12074 }, { "epoch": 0.9308510638297872, "grad_norm": 4.058561325073242, "learning_rate": 1.2488464741202355e-07, "loss": 0.9352, "step": 12075 }, { "epoch": 0.9309281529448042, "grad_norm": 4.049665927886963, "learning_rate": 1.2460751474798249e-07, "loss": 1.0101, "step": 12076 }, { "epoch": 0.9310052420598212, "grad_norm": 3.6814520359039307, "learning_rate": 1.2433068603941223e-07, "loss": 0.881, "step": 12077 }, { "epoch": 0.9310823311748381, "grad_norm": 4.561155796051025, "learning_rate": 1.2405416130357172e-07, "loss": 0.9776, "step": 12078 }, { "epoch": 0.9311594202898551, "grad_norm": 4.181887149810791, "learning_rate": 1.2377794055770164e-07, "loss": 0.8847, "step": 12079 }, { "epoch": 0.931236509404872, "grad_norm": 3.861226797103882, "learning_rate": 1.235020238190221e-07, "loss": 1.0193, "step": 12080 }, { "epoch": 0.931313598519889, "grad_norm": 3.324911594390869, "learning_rate": 1.23226411104736e-07, "loss": 0.8281, "step": 12081 }, { "epoch": 0.931390687634906, "grad_norm": 3.70337176322937, "learning_rate": 1.2295110243202458e-07, "loss": 0.9571, "step": 12082 }, { "epoch": 0.9314677767499229, "grad_norm": 3.8327345848083496, "learning_rate": 1.2267609781805356e-07, "loss": 0.8885, "step": 12083 }, { "epoch": 0.9315448658649399, "grad_norm": 3.6880276203155518, "learning_rate": 1.2240139727996757e-07, "loss": 0.8384, "step": 12084 }, { "epoch": 0.9316219549799568, "grad_norm": 3.634875774383545, "learning_rate": 1.2212700083489236e-07, "loss": 0.9202, "step": 12085 }, { "epoch": 0.9316990440949738, "grad_norm": 3.5088517665863037, "learning_rate": 1.2185290849993648e-07, "loss": 0.8589, "step": 12086 }, { "epoch": 0.9317761332099908, "grad_norm": 3.7651207447052, "learning_rate": 1.2157912029218676e-07, "loss": 0.899, "step": 12087 }, { "epoch": 0.9318532223250077, "grad_norm": 3.844053030014038, "learning_rate": 1.2130563622871239e-07, "loss": 0.913, "step": 12088 }, { "epoch": 0.9319303114400247, "grad_norm": 3.8823323249816895, "learning_rate": 1.2103245632656414e-07, "loss": 0.9003, "step": 12089 }, { "epoch": 0.9320074005550416, "grad_norm": 3.5348687171936035, "learning_rate": 1.2075958060277394e-07, "loss": 0.8039, "step": 12090 }, { "epoch": 0.9320844896700586, "grad_norm": 3.1809444427490234, "learning_rate": 1.2048700907435318e-07, "loss": 0.7699, "step": 12091 }, { "epoch": 0.9321615787850756, "grad_norm": 3.506540298461914, "learning_rate": 1.2021474175829662e-07, "loss": 0.9134, "step": 12092 }, { "epoch": 0.9322386679000925, "grad_norm": 3.255476236343384, "learning_rate": 1.1994277867157734e-07, "loss": 0.8231, "step": 12093 }, { "epoch": 0.9323157570151095, "grad_norm": 3.484185218811035, "learning_rate": 1.196711198311512e-07, "loss": 0.8325, "step": 12094 }, { "epoch": 0.9323928461301264, "grad_norm": 4.07226037979126, "learning_rate": 1.1939976525395468e-07, "loss": 0.9041, "step": 12095 }, { "epoch": 0.9324699352451434, "grad_norm": 3.9831414222717285, "learning_rate": 1.1912871495690592e-07, "loss": 0.9138, "step": 12096 }, { "epoch": 0.9325470243601603, "grad_norm": 3.949620246887207, "learning_rate": 1.1885796895690304e-07, "loss": 1.0008, "step": 12097 }, { "epoch": 0.9326241134751773, "grad_norm": 3.8057408332824707, "learning_rate": 1.185875272708259e-07, "loss": 0.9066, "step": 12098 }, { "epoch": 0.9327012025901943, "grad_norm": 4.090064525604248, "learning_rate": 1.1831738991553432e-07, "loss": 0.9784, "step": 12099 }, { "epoch": 0.9327782917052112, "grad_norm": 4.04171895980835, "learning_rate": 1.1804755690787095e-07, "loss": 0.98, "step": 12100 }, { "epoch": 0.9328553808202282, "grad_norm": 3.672153949737549, "learning_rate": 1.177780282646579e-07, "loss": 0.8458, "step": 12101 }, { "epoch": 0.9329324699352451, "grad_norm": 3.4648172855377197, "learning_rate": 1.1750880400269948e-07, "loss": 0.8031, "step": 12102 }, { "epoch": 0.9330095590502621, "grad_norm": 3.73508882522583, "learning_rate": 1.1723988413878063e-07, "loss": 0.9876, "step": 12103 }, { "epoch": 0.9330866481652791, "grad_norm": 3.7413759231567383, "learning_rate": 1.1697126868966569e-07, "loss": 0.8011, "step": 12104 }, { "epoch": 0.933163737280296, "grad_norm": 3.7317357063293457, "learning_rate": 1.1670295767210238e-07, "loss": 0.8842, "step": 12105 }, { "epoch": 0.933240826395313, "grad_norm": 3.6849234104156494, "learning_rate": 1.1643495110281844e-07, "loss": 0.9667, "step": 12106 }, { "epoch": 0.9333179155103299, "grad_norm": 3.7106947898864746, "learning_rate": 1.1616724899852217e-07, "loss": 0.8664, "step": 12107 }, { "epoch": 0.9333950046253469, "grad_norm": 3.798051595687866, "learning_rate": 1.158998513759052e-07, "loss": 0.8214, "step": 12108 }, { "epoch": 0.9334720937403639, "grad_norm": 4.954608917236328, "learning_rate": 1.156327582516359e-07, "loss": 0.9682, "step": 12109 }, { "epoch": 0.9335491828553808, "grad_norm": 3.375283718109131, "learning_rate": 1.1536596964236757e-07, "loss": 0.7497, "step": 12110 }, { "epoch": 0.9336262719703978, "grad_norm": 3.573298931121826, "learning_rate": 1.1509948556473306e-07, "loss": 0.8748, "step": 12111 }, { "epoch": 0.9337033610854147, "grad_norm": 3.768425464630127, "learning_rate": 1.1483330603534625e-07, "loss": 0.8943, "step": 12112 }, { "epoch": 0.9337804502004317, "grad_norm": 3.941859245300293, "learning_rate": 1.1456743107080171e-07, "loss": 0.9499, "step": 12113 }, { "epoch": 0.9338575393154487, "grad_norm": 3.9781956672668457, "learning_rate": 1.1430186068767557e-07, "loss": 0.8871, "step": 12114 }, { "epoch": 0.9339346284304656, "grad_norm": 3.6682260036468506, "learning_rate": 1.1403659490252462e-07, "loss": 0.8444, "step": 12115 }, { "epoch": 0.9340117175454826, "grad_norm": 3.898146629333496, "learning_rate": 1.137716337318867e-07, "loss": 0.9247, "step": 12116 }, { "epoch": 0.9340888066604995, "grad_norm": 4.3249335289001465, "learning_rate": 1.1350697719228144e-07, "loss": 0.9518, "step": 12117 }, { "epoch": 0.9341658957755165, "grad_norm": 3.901083469390869, "learning_rate": 1.1324262530020835e-07, "loss": 0.9795, "step": 12118 }, { "epoch": 0.9342429848905335, "grad_norm": 3.6279914379119873, "learning_rate": 1.1297857807214818e-07, "loss": 0.8613, "step": 12119 }, { "epoch": 0.9343200740055504, "grad_norm": 3.867549180984497, "learning_rate": 1.127148355245633e-07, "loss": 0.8051, "step": 12120 }, { "epoch": 0.9343971631205674, "grad_norm": 3.700650453567505, "learning_rate": 1.1245139767389612e-07, "loss": 0.8942, "step": 12121 }, { "epoch": 0.9344742522355843, "grad_norm": 3.632929563522339, "learning_rate": 1.1218826453657127e-07, "loss": 0.8625, "step": 12122 }, { "epoch": 0.9345513413506013, "grad_norm": 3.861298084259033, "learning_rate": 1.1192543612899398e-07, "loss": 0.9062, "step": 12123 }, { "epoch": 0.9346284304656183, "grad_norm": 3.5877022743225098, "learning_rate": 1.1166291246754945e-07, "loss": 0.8753, "step": 12124 }, { "epoch": 0.9347055195806352, "grad_norm": 3.9185619354248047, "learning_rate": 1.114006935686046e-07, "loss": 0.8794, "step": 12125 }, { "epoch": 0.9347826086956522, "grad_norm": 3.9409501552581787, "learning_rate": 1.1113877944850804e-07, "loss": 1.0012, "step": 12126 }, { "epoch": 0.9348596978106691, "grad_norm": 3.9709725379943848, "learning_rate": 1.1087717012358834e-07, "loss": 1.0408, "step": 12127 }, { "epoch": 0.934936786925686, "grad_norm": 3.867388963699341, "learning_rate": 1.1061586561015636e-07, "loss": 0.9123, "step": 12128 }, { "epoch": 0.9350138760407031, "grad_norm": 3.8618171215057373, "learning_rate": 1.1035486592450184e-07, "loss": 0.8511, "step": 12129 }, { "epoch": 0.93509096515572, "grad_norm": 3.811495542526245, "learning_rate": 1.1009417108289733e-07, "loss": 0.8261, "step": 12130 }, { "epoch": 0.935168054270737, "grad_norm": 4.648553848266602, "learning_rate": 1.0983378110159593e-07, "loss": 0.9152, "step": 12131 }, { "epoch": 0.9352451433857539, "grad_norm": 3.401317834854126, "learning_rate": 1.0957369599683132e-07, "loss": 0.9336, "step": 12132 }, { "epoch": 0.9353222325007708, "grad_norm": 4.083096504211426, "learning_rate": 1.0931391578481832e-07, "loss": 0.9892, "step": 12133 }, { "epoch": 0.9353993216157879, "grad_norm": 3.8430960178375244, "learning_rate": 1.0905444048175396e-07, "loss": 0.8581, "step": 12134 }, { "epoch": 0.9354764107308048, "grad_norm": 3.859454393386841, "learning_rate": 1.0879527010381419e-07, "loss": 0.9705, "step": 12135 }, { "epoch": 0.9355534998458218, "grad_norm": 4.2190375328063965, "learning_rate": 1.0853640466715664e-07, "loss": 0.8858, "step": 12136 }, { "epoch": 0.9356305889608387, "grad_norm": 3.7572779655456543, "learning_rate": 1.0827784418792064e-07, "loss": 0.9528, "step": 12137 }, { "epoch": 0.9357076780758556, "grad_norm": 3.6124427318573, "learning_rate": 1.0801958868222662e-07, "loss": 0.8229, "step": 12138 }, { "epoch": 0.9357847671908727, "grad_norm": 4.138119220733643, "learning_rate": 1.0776163816617446e-07, "loss": 0.9593, "step": 12139 }, { "epoch": 0.9358618563058896, "grad_norm": 3.941408634185791, "learning_rate": 1.0750399265584744e-07, "loss": 0.8894, "step": 12140 }, { "epoch": 0.9359389454209066, "grad_norm": 3.9744157791137695, "learning_rate": 1.0724665216730711e-07, "loss": 0.9538, "step": 12141 }, { "epoch": 0.9360160345359235, "grad_norm": 3.580399513244629, "learning_rate": 1.0698961671659791e-07, "loss": 0.8409, "step": 12142 }, { "epoch": 0.9360931236509404, "grad_norm": 3.7937207221984863, "learning_rate": 1.0673288631974421e-07, "loss": 1.0333, "step": 12143 }, { "epoch": 0.9361702127659575, "grad_norm": 3.6252706050872803, "learning_rate": 1.0647646099275267e-07, "loss": 0.8437, "step": 12144 }, { "epoch": 0.9362473018809744, "grad_norm": 4.086170673370361, "learning_rate": 1.0622034075160936e-07, "loss": 0.8064, "step": 12145 }, { "epoch": 0.9363243909959914, "grad_norm": 3.6738433837890625, "learning_rate": 1.059645256122821e-07, "loss": 0.9238, "step": 12146 }, { "epoch": 0.9364014801110083, "grad_norm": 4.102461338043213, "learning_rate": 1.0570901559072033e-07, "loss": 0.9676, "step": 12147 }, { "epoch": 0.9364785692260252, "grad_norm": 4.378781318664551, "learning_rate": 1.0545381070285243e-07, "loss": 0.8863, "step": 12148 }, { "epoch": 0.9365556583410423, "grad_norm": 3.481828212738037, "learning_rate": 1.0519891096459067e-07, "loss": 0.827, "step": 12149 }, { "epoch": 0.9366327474560592, "grad_norm": 3.6842973232269287, "learning_rate": 1.0494431639182567e-07, "loss": 0.8607, "step": 12150 }, { "epoch": 0.9367098365710762, "grad_norm": 3.917614221572876, "learning_rate": 1.0469002700043029e-07, "loss": 0.8737, "step": 12151 }, { "epoch": 0.9367869256860931, "grad_norm": 3.5234785079956055, "learning_rate": 1.0443604280625852e-07, "loss": 0.8082, "step": 12152 }, { "epoch": 0.93686401480111, "grad_norm": 3.9610044956207275, "learning_rate": 1.0418236382514435e-07, "loss": 0.9952, "step": 12153 }, { "epoch": 0.9369411039161271, "grad_norm": 3.7560231685638428, "learning_rate": 1.0392899007290347e-07, "loss": 0.8605, "step": 12154 }, { "epoch": 0.937018193031144, "grad_norm": 3.415227174758911, "learning_rate": 1.036759215653338e-07, "loss": 0.8353, "step": 12155 }, { "epoch": 0.937095282146161, "grad_norm": 3.7259321212768555, "learning_rate": 1.0342315831821104e-07, "loss": 0.9268, "step": 12156 }, { "epoch": 0.9371723712611779, "grad_norm": 3.7746005058288574, "learning_rate": 1.0317070034729426e-07, "loss": 0.8681, "step": 12157 }, { "epoch": 0.9372494603761948, "grad_norm": 3.6794521808624268, "learning_rate": 1.0291854766832254e-07, "loss": 0.9124, "step": 12158 }, { "epoch": 0.9373265494912119, "grad_norm": 3.675534725189209, "learning_rate": 1.026667002970172e-07, "loss": 0.9868, "step": 12159 }, { "epoch": 0.9374036386062288, "grad_norm": 3.834210157394409, "learning_rate": 1.0241515824907955e-07, "loss": 0.8566, "step": 12160 }, { "epoch": 0.9374807277212458, "grad_norm": 3.795119285583496, "learning_rate": 1.021639215401915e-07, "loss": 0.9076, "step": 12161 }, { "epoch": 0.9375578168362627, "grad_norm": 3.7029600143432617, "learning_rate": 1.0191299018601608e-07, "loss": 0.8642, "step": 12162 }, { "epoch": 0.9376349059512796, "grad_norm": 3.834038734436035, "learning_rate": 1.0166236420219744e-07, "loss": 0.9159, "step": 12163 }, { "epoch": 0.9377119950662967, "grad_norm": 3.733107328414917, "learning_rate": 1.0141204360436197e-07, "loss": 0.8474, "step": 12164 }, { "epoch": 0.9377890841813136, "grad_norm": 3.8652164936065674, "learning_rate": 1.011620284081144e-07, "loss": 0.9073, "step": 12165 }, { "epoch": 0.9378661732963306, "grad_norm": 4.108710289001465, "learning_rate": 1.0091231862904394e-07, "loss": 0.8865, "step": 12166 }, { "epoch": 0.9379432624113475, "grad_norm": 3.595679998397827, "learning_rate": 1.0066291428271646e-07, "loss": 0.9258, "step": 12167 }, { "epoch": 0.9380203515263644, "grad_norm": 3.6943235397338867, "learning_rate": 1.0041381538468175e-07, "loss": 0.863, "step": 12168 }, { "epoch": 0.9380974406413815, "grad_norm": 3.979586124420166, "learning_rate": 1.0016502195047017e-07, "loss": 0.8683, "step": 12169 }, { "epoch": 0.9381745297563984, "grad_norm": 3.542391777038574, "learning_rate": 9.991653399559265e-08, "loss": 0.8578, "step": 12170 }, { "epoch": 0.9382516188714154, "grad_norm": 3.59480357170105, "learning_rate": 9.966835153554177e-08, "loss": 0.9473, "step": 12171 }, { "epoch": 0.9383287079864323, "grad_norm": 3.7652363777160645, "learning_rate": 9.942047458578852e-08, "loss": 0.9378, "step": 12172 }, { "epoch": 0.9384057971014492, "grad_norm": 3.717331886291504, "learning_rate": 9.917290316178884e-08, "loss": 0.8719, "step": 12173 }, { "epoch": 0.9384828862164662, "grad_norm": 4.402118682861328, "learning_rate": 9.892563727897597e-08, "loss": 0.9718, "step": 12174 }, { "epoch": 0.9385599753314832, "grad_norm": 3.7598085403442383, "learning_rate": 9.867867695276645e-08, "loss": 0.8839, "step": 12175 }, { "epoch": 0.9386370644465002, "grad_norm": 3.4538068771362305, "learning_rate": 9.843202219855685e-08, "loss": 0.8984, "step": 12176 }, { "epoch": 0.9387141535615171, "grad_norm": 3.9770476818084717, "learning_rate": 9.818567303172544e-08, "loss": 0.8237, "step": 12177 }, { "epoch": 0.938791242676534, "grad_norm": 3.9193336963653564, "learning_rate": 9.793962946762936e-08, "loss": 0.9098, "step": 12178 }, { "epoch": 0.938868331791551, "grad_norm": 3.7343082427978516, "learning_rate": 9.769389152160913e-08, "loss": 0.9654, "step": 12179 }, { "epoch": 0.938945420906568, "grad_norm": 3.766138792037964, "learning_rate": 9.744845920898527e-08, "loss": 0.9089, "step": 12180 }, { "epoch": 0.939022510021585, "grad_norm": 4.211320877075195, "learning_rate": 9.720333254505887e-08, "loss": 0.9806, "step": 12181 }, { "epoch": 0.9390995991366019, "grad_norm": 4.284248352050781, "learning_rate": 9.695851154511271e-08, "loss": 1.0172, "step": 12182 }, { "epoch": 0.9391766882516188, "grad_norm": 3.9053781032562256, "learning_rate": 9.671399622440957e-08, "loss": 0.8795, "step": 12183 }, { "epoch": 0.9392537773666358, "grad_norm": 3.633023262023926, "learning_rate": 9.646978659819394e-08, "loss": 0.8426, "step": 12184 }, { "epoch": 0.9393308664816528, "grad_norm": 4.009373188018799, "learning_rate": 9.622588268169141e-08, "loss": 0.8787, "step": 12185 }, { "epoch": 0.9394079555966698, "grad_norm": 3.9296786785125732, "learning_rate": 9.598228449010704e-08, "loss": 1.0206, "step": 12186 }, { "epoch": 0.9394850447116867, "grad_norm": 3.5266220569610596, "learning_rate": 9.573899203862925e-08, "loss": 0.925, "step": 12187 }, { "epoch": 0.9395621338267036, "grad_norm": 3.396301746368408, "learning_rate": 9.549600534242587e-08, "loss": 0.831, "step": 12188 }, { "epoch": 0.9396392229417206, "grad_norm": 3.696995735168457, "learning_rate": 9.525332441664481e-08, "loss": 0.8887, "step": 12189 }, { "epoch": 0.9397163120567376, "grad_norm": 3.7658541202545166, "learning_rate": 9.50109492764173e-08, "loss": 0.9345, "step": 12190 }, { "epoch": 0.9397934011717546, "grad_norm": 3.884662389755249, "learning_rate": 9.476887993685291e-08, "loss": 0.9221, "step": 12191 }, { "epoch": 0.9398704902867715, "grad_norm": 3.74465274810791, "learning_rate": 9.452711641304402e-08, "loss": 0.889, "step": 12192 }, { "epoch": 0.9399475794017884, "grad_norm": 3.8537025451660156, "learning_rate": 9.428565872006356e-08, "loss": 1.0205, "step": 12193 }, { "epoch": 0.9400246685168054, "grad_norm": 3.9346094131469727, "learning_rate": 9.404450687296507e-08, "loss": 0.835, "step": 12194 }, { "epoch": 0.9401017576318224, "grad_norm": 3.567856550216675, "learning_rate": 9.380366088678261e-08, "loss": 0.8092, "step": 12195 }, { "epoch": 0.9401788467468394, "grad_norm": 3.484372854232788, "learning_rate": 9.356312077653196e-08, "loss": 0.9011, "step": 12196 }, { "epoch": 0.9402559358618563, "grad_norm": 4.030934810638428, "learning_rate": 9.332288655720945e-08, "loss": 0.9122, "step": 12197 }, { "epoch": 0.9403330249768732, "grad_norm": 5.114222526550293, "learning_rate": 9.308295824379365e-08, "loss": 0.9911, "step": 12198 }, { "epoch": 0.9404101140918902, "grad_norm": 3.6418612003326416, "learning_rate": 9.284333585124094e-08, "loss": 0.9481, "step": 12199 }, { "epoch": 0.9404872032069072, "grad_norm": 3.4810874462127686, "learning_rate": 9.260401939449215e-08, "loss": 0.9044, "step": 12200 }, { "epoch": 0.9405642923219242, "grad_norm": 3.6137795448303223, "learning_rate": 9.236500888846589e-08, "loss": 0.7977, "step": 12201 }, { "epoch": 0.9406413814369411, "grad_norm": 3.7133419513702393, "learning_rate": 9.212630434806413e-08, "loss": 0.9542, "step": 12202 }, { "epoch": 0.940718470551958, "grad_norm": 3.7179129123687744, "learning_rate": 9.188790578816942e-08, "loss": 0.8459, "step": 12203 }, { "epoch": 0.940795559666975, "grad_norm": 3.615569591522217, "learning_rate": 9.164981322364375e-08, "loss": 0.8417, "step": 12204 }, { "epoch": 0.940872648781992, "grad_norm": 3.824866533279419, "learning_rate": 9.141202666933135e-08, "loss": 1.0083, "step": 12205 }, { "epoch": 0.940949737897009, "grad_norm": 3.547849655151367, "learning_rate": 9.11745461400565e-08, "loss": 0.8361, "step": 12206 }, { "epoch": 0.9410268270120259, "grad_norm": 3.908607244491577, "learning_rate": 9.09373716506251e-08, "loss": 0.8185, "step": 12207 }, { "epoch": 0.9411039161270429, "grad_norm": 3.6814157962799072, "learning_rate": 9.07005032158248e-08, "loss": 0.8893, "step": 12208 }, { "epoch": 0.9411810052420598, "grad_norm": 3.7791712284088135, "learning_rate": 9.046394085042154e-08, "loss": 0.9866, "step": 12209 }, { "epoch": 0.9412580943570767, "grad_norm": 3.7115676403045654, "learning_rate": 9.022768456916409e-08, "loss": 1.0297, "step": 12210 }, { "epoch": 0.9413351834720938, "grad_norm": 3.7487754821777344, "learning_rate": 8.999173438678233e-08, "loss": 0.9212, "step": 12211 }, { "epoch": 0.9414122725871107, "grad_norm": 3.4741318225860596, "learning_rate": 8.975609031798671e-08, "loss": 0.9159, "step": 12212 }, { "epoch": 0.9414893617021277, "grad_norm": 3.7162187099456787, "learning_rate": 8.952075237746771e-08, "loss": 0.939, "step": 12213 }, { "epoch": 0.9415664508171446, "grad_norm": 3.8283746242523193, "learning_rate": 8.928572057989804e-08, "loss": 0.8525, "step": 12214 }, { "epoch": 0.9416435399321615, "grad_norm": 3.759662628173828, "learning_rate": 8.905099493993041e-08, "loss": 0.879, "step": 12215 }, { "epoch": 0.9417206290471786, "grad_norm": 3.5151100158691406, "learning_rate": 8.881657547219869e-08, "loss": 0.8647, "step": 12216 }, { "epoch": 0.9417977181621955, "grad_norm": 4.150439739227295, "learning_rate": 8.858246219131784e-08, "loss": 0.9453, "step": 12217 }, { "epoch": 0.9418748072772125, "grad_norm": 3.5774085521698, "learning_rate": 8.83486551118834e-08, "loss": 0.8436, "step": 12218 }, { "epoch": 0.9419518963922294, "grad_norm": 3.561293601989746, "learning_rate": 8.811515424847261e-08, "loss": 0.7775, "step": 12219 }, { "epoch": 0.9420289855072463, "grad_norm": 4.2305827140808105, "learning_rate": 8.788195961564273e-08, "loss": 0.9484, "step": 12220 }, { "epoch": 0.9421060746222634, "grad_norm": 3.9733986854553223, "learning_rate": 8.764907122793154e-08, "loss": 0.8226, "step": 12221 }, { "epoch": 0.9421831637372803, "grad_norm": 3.7328057289123535, "learning_rate": 8.741648909985967e-08, "loss": 1.0214, "step": 12222 }, { "epoch": 0.9422602528522973, "grad_norm": 3.4936575889587402, "learning_rate": 8.718421324592608e-08, "loss": 0.9083, "step": 12223 }, { "epoch": 0.9423373419673142, "grad_norm": 3.8838651180267334, "learning_rate": 8.695224368061305e-08, "loss": 0.922, "step": 12224 }, { "epoch": 0.9424144310823311, "grad_norm": 3.990166664123535, "learning_rate": 8.672058041838294e-08, "loss": 0.8594, "step": 12225 }, { "epoch": 0.9424915201973482, "grad_norm": 3.8123323917388916, "learning_rate": 8.64892234736775e-08, "loss": 0.8615, "step": 12226 }, { "epoch": 0.9425686093123651, "grad_norm": 3.807555675506592, "learning_rate": 8.625817286092075e-08, "loss": 0.8749, "step": 12227 }, { "epoch": 0.9426456984273821, "grad_norm": 3.557279586791992, "learning_rate": 8.602742859451841e-08, "loss": 0.8594, "step": 12228 }, { "epoch": 0.942722787542399, "grad_norm": 4.088743209838867, "learning_rate": 8.579699068885616e-08, "loss": 0.9473, "step": 12229 }, { "epoch": 0.9427998766574159, "grad_norm": 3.422257661819458, "learning_rate": 8.556685915830033e-08, "loss": 0.8581, "step": 12230 }, { "epoch": 0.942876965772433, "grad_norm": 3.8169891834259033, "learning_rate": 8.533703401719773e-08, "loss": 0.9015, "step": 12231 }, { "epoch": 0.9429540548874499, "grad_norm": 3.840945243835449, "learning_rate": 8.510751527987748e-08, "loss": 0.9821, "step": 12232 }, { "epoch": 0.9430311440024669, "grad_norm": 3.5995681285858154, "learning_rate": 8.487830296064869e-08, "loss": 0.9348, "step": 12233 }, { "epoch": 0.9431082331174838, "grad_norm": 3.9970502853393555, "learning_rate": 8.46493970738016e-08, "loss": 0.9943, "step": 12234 }, { "epoch": 0.9431853222325007, "grad_norm": 3.852111339569092, "learning_rate": 8.442079763360755e-08, "loss": 0.9228, "step": 12235 }, { "epoch": 0.9432624113475178, "grad_norm": 3.759093761444092, "learning_rate": 8.419250465431905e-08, "loss": 0.8915, "step": 12236 }, { "epoch": 0.9433395004625347, "grad_norm": 3.7389092445373535, "learning_rate": 8.396451815016749e-08, "loss": 0.8838, "step": 12237 }, { "epoch": 0.9434165895775517, "grad_norm": 3.6164088249206543, "learning_rate": 8.373683813536703e-08, "loss": 0.8513, "step": 12238 }, { "epoch": 0.9434936786925686, "grad_norm": 3.7332277297973633, "learning_rate": 8.350946462411303e-08, "loss": 0.9333, "step": 12239 }, { "epoch": 0.9435707678075855, "grad_norm": 3.6654908657073975, "learning_rate": 8.328239763058077e-08, "loss": 0.8103, "step": 12240 }, { "epoch": 0.9436478569226026, "grad_norm": 3.48644757270813, "learning_rate": 8.305563716892728e-08, "loss": 0.9154, "step": 12241 }, { "epoch": 0.9437249460376195, "grad_norm": 3.515536308288574, "learning_rate": 8.282918325328848e-08, "loss": 0.9073, "step": 12242 }, { "epoch": 0.9438020351526365, "grad_norm": 3.6415224075317383, "learning_rate": 8.260303589778362e-08, "loss": 0.849, "step": 12243 }, { "epoch": 0.9438791242676534, "grad_norm": 3.827643394470215, "learning_rate": 8.237719511651199e-08, "loss": 0.8901, "step": 12244 }, { "epoch": 0.9439562133826703, "grad_norm": 4.021296501159668, "learning_rate": 8.215166092355286e-08, "loss": 1.0162, "step": 12245 }, { "epoch": 0.9440333024976874, "grad_norm": 3.674678325653076, "learning_rate": 8.192643333296779e-08, "loss": 0.8896, "step": 12246 }, { "epoch": 0.9441103916127043, "grad_norm": 3.961991310119629, "learning_rate": 8.17015123587983e-08, "loss": 0.9414, "step": 12247 }, { "epoch": 0.9441874807277213, "grad_norm": 3.9013025760650635, "learning_rate": 8.147689801506653e-08, "loss": 0.9172, "step": 12248 }, { "epoch": 0.9442645698427382, "grad_norm": 3.474181890487671, "learning_rate": 8.125259031577681e-08, "loss": 0.8584, "step": 12249 }, { "epoch": 0.9443416589577551, "grad_norm": 4.0712103843688965, "learning_rate": 8.102858927491297e-08, "loss": 0.9646, "step": 12250 }, { "epoch": 0.9444187480727722, "grad_norm": 3.6317129135131836, "learning_rate": 8.080489490644106e-08, "loss": 0.825, "step": 12251 }, { "epoch": 0.9444958371877891, "grad_norm": 3.92905855178833, "learning_rate": 8.058150722430658e-08, "loss": 0.9223, "step": 12252 }, { "epoch": 0.9445729263028061, "grad_norm": 4.44641637802124, "learning_rate": 8.035842624243673e-08, "loss": 0.9286, "step": 12253 }, { "epoch": 0.944650015417823, "grad_norm": 3.717348337173462, "learning_rate": 8.013565197473927e-08, "loss": 0.8958, "step": 12254 }, { "epoch": 0.9447271045328399, "grad_norm": 3.6619021892547607, "learning_rate": 7.991318443510365e-08, "loss": 0.873, "step": 12255 }, { "epoch": 0.944804193647857, "grad_norm": 3.7299957275390625, "learning_rate": 7.969102363739933e-08, "loss": 0.9392, "step": 12256 }, { "epoch": 0.9448812827628739, "grad_norm": 3.6785383224487305, "learning_rate": 7.946916959547635e-08, "loss": 0.8582, "step": 12257 }, { "epoch": 0.9449583718778909, "grad_norm": 3.580245018005371, "learning_rate": 7.92476223231664e-08, "loss": 0.8853, "step": 12258 }, { "epoch": 0.9450354609929078, "grad_norm": 3.7513821125030518, "learning_rate": 7.902638183428235e-08, "loss": 0.9091, "step": 12259 }, { "epoch": 0.9451125501079247, "grad_norm": 3.6423444747924805, "learning_rate": 7.880544814261704e-08, "loss": 0.9026, "step": 12260 }, { "epoch": 0.9451896392229417, "grad_norm": 3.6739706993103027, "learning_rate": 7.858482126194445e-08, "loss": 0.8349, "step": 12261 }, { "epoch": 0.9452667283379587, "grad_norm": 4.1562628746032715, "learning_rate": 7.836450120601968e-08, "loss": 0.8982, "step": 12262 }, { "epoch": 0.9453438174529757, "grad_norm": 3.613546133041382, "learning_rate": 7.814448798857843e-08, "loss": 0.8372, "step": 12263 }, { "epoch": 0.9454209065679926, "grad_norm": 3.739797353744507, "learning_rate": 7.792478162333694e-08, "loss": 0.9476, "step": 12264 }, { "epoch": 0.9454979956830095, "grad_norm": 4.127760410308838, "learning_rate": 7.77053821239937e-08, "loss": 0.9036, "step": 12265 }, { "epoch": 0.9455750847980265, "grad_norm": 3.717101812362671, "learning_rate": 7.748628950422666e-08, "loss": 0.8591, "step": 12266 }, { "epoch": 0.9456521739130435, "grad_norm": 3.6435585021972656, "learning_rate": 7.726750377769488e-08, "loss": 0.9294, "step": 12267 }, { "epoch": 0.9457292630280605, "grad_norm": 3.5347166061401367, "learning_rate": 7.704902495803911e-08, "loss": 0.8237, "step": 12268 }, { "epoch": 0.9458063521430774, "grad_norm": 3.792682647705078, "learning_rate": 7.683085305887961e-08, "loss": 0.9064, "step": 12269 }, { "epoch": 0.9458834412580943, "grad_norm": 3.9385581016540527, "learning_rate": 7.661298809381878e-08, "loss": 0.9554, "step": 12270 }, { "epoch": 0.9459605303731113, "grad_norm": 3.6666650772094727, "learning_rate": 7.639543007643913e-08, "loss": 0.898, "step": 12271 }, { "epoch": 0.9460376194881283, "grad_norm": 3.755439043045044, "learning_rate": 7.617817902030478e-08, "loss": 0.892, "step": 12272 }, { "epoch": 0.9461147086031453, "grad_norm": 4.0008225440979, "learning_rate": 7.59612349389599e-08, "loss": 0.8153, "step": 12273 }, { "epoch": 0.9461917977181622, "grad_norm": 3.6859991550445557, "learning_rate": 7.574459784592981e-08, "loss": 0.8639, "step": 12274 }, { "epoch": 0.9462688868331791, "grad_norm": 3.624303102493286, "learning_rate": 7.552826775472033e-08, "loss": 0.8132, "step": 12275 }, { "epoch": 0.9463459759481961, "grad_norm": 4.032271385192871, "learning_rate": 7.531224467881848e-08, "loss": 1.0711, "step": 12276 }, { "epoch": 0.946423065063213, "grad_norm": 3.7532832622528076, "learning_rate": 7.509652863169348e-08, "loss": 0.8494, "step": 12277 }, { "epoch": 0.9465001541782301, "grad_norm": 4.128763675689697, "learning_rate": 7.48811196267929e-08, "loss": 0.8642, "step": 12278 }, { "epoch": 0.946577243293247, "grad_norm": 3.744234561920166, "learning_rate": 7.466601767754655e-08, "loss": 0.9608, "step": 12279 }, { "epoch": 0.9466543324082639, "grad_norm": 3.6240429878234863, "learning_rate": 7.445122279736484e-08, "loss": 0.8315, "step": 12280 }, { "epoch": 0.9467314215232809, "grad_norm": 3.724736213684082, "learning_rate": 7.423673499963924e-08, "loss": 0.8643, "step": 12281 }, { "epoch": 0.9468085106382979, "grad_norm": 3.973217248916626, "learning_rate": 7.402255429774241e-08, "loss": 0.9679, "step": 12282 }, { "epoch": 0.9468855997533149, "grad_norm": 3.8771355152130127, "learning_rate": 7.380868070502644e-08, "loss": 0.8696, "step": 12283 }, { "epoch": 0.9469626888683318, "grad_norm": 3.8972392082214355, "learning_rate": 7.359511423482679e-08, "loss": 1.005, "step": 12284 }, { "epoch": 0.9470397779833487, "grad_norm": 3.7604198455810547, "learning_rate": 7.338185490045668e-08, "loss": 0.867, "step": 12285 }, { "epoch": 0.9471168670983657, "grad_norm": 3.7321414947509766, "learning_rate": 7.316890271521215e-08, "loss": 0.8899, "step": 12286 }, { "epoch": 0.9471939562133826, "grad_norm": 4.230839252471924, "learning_rate": 7.29562576923698e-08, "loss": 1.034, "step": 12287 }, { "epoch": 0.9472710453283997, "grad_norm": 3.6010680198669434, "learning_rate": 7.274391984518736e-08, "loss": 0.8038, "step": 12288 }, { "epoch": 0.9473481344434166, "grad_norm": 3.6797475814819336, "learning_rate": 7.253188918690257e-08, "loss": 0.9017, "step": 12289 }, { "epoch": 0.9474252235584335, "grad_norm": 3.575794219970703, "learning_rate": 7.232016573073431e-08, "loss": 0.8756, "step": 12290 }, { "epoch": 0.9475023126734505, "grad_norm": 3.789335012435913, "learning_rate": 7.210874948988255e-08, "loss": 0.9401, "step": 12291 }, { "epoch": 0.9475794017884674, "grad_norm": 3.835794448852539, "learning_rate": 7.189764047752789e-08, "loss": 0.9796, "step": 12292 }, { "epoch": 0.9476564909034845, "grad_norm": 3.511427879333496, "learning_rate": 7.168683870683258e-08, "loss": 0.9066, "step": 12293 }, { "epoch": 0.9477335800185014, "grad_norm": 3.5862207412719727, "learning_rate": 7.14763441909383e-08, "loss": 0.9302, "step": 12294 }, { "epoch": 0.9478106691335183, "grad_norm": 3.8762261867523193, "learning_rate": 7.126615694296846e-08, "loss": 0.9133, "step": 12295 }, { "epoch": 0.9478877582485353, "grad_norm": 3.6678431034088135, "learning_rate": 7.105627697602702e-08, "loss": 0.8997, "step": 12296 }, { "epoch": 0.9479648473635522, "grad_norm": 3.454246997833252, "learning_rate": 7.084670430319907e-08, "loss": 0.7763, "step": 12297 }, { "epoch": 0.9480419364785693, "grad_norm": 3.5856475830078125, "learning_rate": 7.063743893755026e-08, "loss": 0.8533, "step": 12298 }, { "epoch": 0.9481190255935862, "grad_norm": 3.7448766231536865, "learning_rate": 7.042848089212794e-08, "loss": 0.9361, "step": 12299 }, { "epoch": 0.9481961147086031, "grad_norm": 3.805290460586548, "learning_rate": 7.021983017995836e-08, "loss": 0.9013, "step": 12300 }, { "epoch": 0.9482732038236201, "grad_norm": 3.3226075172424316, "learning_rate": 7.001148681405056e-08, "loss": 0.7962, "step": 12301 }, { "epoch": 0.948350292938637, "grad_norm": 3.614866256713867, "learning_rate": 6.980345080739303e-08, "loss": 0.7797, "step": 12302 }, { "epoch": 0.9484273820536541, "grad_norm": 3.74783992767334, "learning_rate": 6.959572217295651e-08, "loss": 0.9274, "step": 12303 }, { "epoch": 0.948504471168671, "grad_norm": 4.042082786560059, "learning_rate": 6.938830092369176e-08, "loss": 1.0048, "step": 12304 }, { "epoch": 0.9485815602836879, "grad_norm": 3.7380220890045166, "learning_rate": 6.918118707253007e-08, "loss": 0.9577, "step": 12305 }, { "epoch": 0.9486586493987049, "grad_norm": 4.00551700592041, "learning_rate": 6.897438063238393e-08, "loss": 0.8565, "step": 12306 }, { "epoch": 0.9487357385137218, "grad_norm": 4.03279972076416, "learning_rate": 6.87678816161469e-08, "loss": 1.0123, "step": 12307 }, { "epoch": 0.9488128276287389, "grad_norm": 3.53090500831604, "learning_rate": 6.856169003669256e-08, "loss": 0.7968, "step": 12308 }, { "epoch": 0.9488899167437558, "grad_norm": 3.888162612915039, "learning_rate": 6.835580590687618e-08, "loss": 0.9289, "step": 12309 }, { "epoch": 0.9489670058587727, "grad_norm": 3.883291721343994, "learning_rate": 6.815022923953418e-08, "loss": 0.8446, "step": 12310 }, { "epoch": 0.9490440949737897, "grad_norm": 4.122531414031982, "learning_rate": 6.794496004748241e-08, "loss": 0.8171, "step": 12311 }, { "epoch": 0.9491211840888066, "grad_norm": 3.75138521194458, "learning_rate": 6.773999834351841e-08, "loss": 0.8468, "step": 12312 }, { "epoch": 0.9491982732038237, "grad_norm": 3.7109479904174805, "learning_rate": 6.753534414042085e-08, "loss": 0.9143, "step": 12313 }, { "epoch": 0.9492753623188406, "grad_norm": 3.7108957767486572, "learning_rate": 6.733099745094896e-08, "loss": 0.9051, "step": 12314 }, { "epoch": 0.9493524514338575, "grad_norm": 3.8768532276153564, "learning_rate": 6.712695828784254e-08, "loss": 0.8712, "step": 12315 }, { "epoch": 0.9494295405488745, "grad_norm": 3.5676753520965576, "learning_rate": 6.6923226663822e-08, "loss": 0.8435, "step": 12316 }, { "epoch": 0.9495066296638914, "grad_norm": 3.522658348083496, "learning_rate": 6.671980259158883e-08, "loss": 0.7721, "step": 12317 }, { "epoch": 0.9495837187789085, "grad_norm": 3.815915584564209, "learning_rate": 6.651668608382622e-08, "loss": 0.9029, "step": 12318 }, { "epoch": 0.9496608078939254, "grad_norm": 3.938446044921875, "learning_rate": 6.631387715319681e-08, "loss": 0.9521, "step": 12319 }, { "epoch": 0.9497378970089423, "grad_norm": 3.745887517929077, "learning_rate": 6.611137581234495e-08, "loss": 0.9028, "step": 12320 }, { "epoch": 0.9498149861239593, "grad_norm": 3.692595958709717, "learning_rate": 6.590918207389608e-08, "loss": 0.8865, "step": 12321 }, { "epoch": 0.9498920752389762, "grad_norm": 4.061500549316406, "learning_rate": 6.57072959504551e-08, "loss": 0.9498, "step": 12322 }, { "epoch": 0.9499691643539933, "grad_norm": 3.645247459411621, "learning_rate": 6.550571745460865e-08, "loss": 0.8695, "step": 12323 }, { "epoch": 0.9500462534690102, "grad_norm": 3.4942803382873535, "learning_rate": 6.530444659892443e-08, "loss": 0.8362, "step": 12324 }, { "epoch": 0.9501233425840271, "grad_norm": 3.572190284729004, "learning_rate": 6.510348339595074e-08, "loss": 0.9694, "step": 12325 }, { "epoch": 0.9502004316990441, "grad_norm": 3.5825698375701904, "learning_rate": 6.490282785821645e-08, "loss": 0.8123, "step": 12326 }, { "epoch": 0.950277520814061, "grad_norm": 3.692664861679077, "learning_rate": 6.470247999823099e-08, "loss": 0.897, "step": 12327 }, { "epoch": 0.950354609929078, "grad_norm": 3.427412271499634, "learning_rate": 6.450243982848548e-08, "loss": 0.8236, "step": 12328 }, { "epoch": 0.950431699044095, "grad_norm": 3.9332494735717773, "learning_rate": 6.430270736145106e-08, "loss": 0.9017, "step": 12329 }, { "epoch": 0.9505087881591119, "grad_norm": 3.665562629699707, "learning_rate": 6.410328260957998e-08, "loss": 0.8807, "step": 12330 }, { "epoch": 0.9505858772741289, "grad_norm": 3.466925859451294, "learning_rate": 6.390416558530622e-08, "loss": 0.8508, "step": 12331 }, { "epoch": 0.9506629663891458, "grad_norm": 3.790743112564087, "learning_rate": 6.370535630104257e-08, "loss": 0.8721, "step": 12332 }, { "epoch": 0.9507400555041629, "grad_norm": 3.7184832096099854, "learning_rate": 6.350685476918416e-08, "loss": 0.964, "step": 12333 }, { "epoch": 0.9508171446191798, "grad_norm": 3.781548500061035, "learning_rate": 6.33086610021072e-08, "loss": 0.9842, "step": 12334 }, { "epoch": 0.9508942337341967, "grad_norm": 4.001351356506348, "learning_rate": 6.311077501216677e-08, "loss": 0.9441, "step": 12335 }, { "epoch": 0.9509713228492137, "grad_norm": 3.9916863441467285, "learning_rate": 6.291319681170139e-08, "loss": 0.9009, "step": 12336 }, { "epoch": 0.9510484119642306, "grad_norm": 4.026021480560303, "learning_rate": 6.271592641302781e-08, "loss": 0.9658, "step": 12337 }, { "epoch": 0.9511255010792476, "grad_norm": 3.522988796234131, "learning_rate": 6.251896382844569e-08, "loss": 0.796, "step": 12338 }, { "epoch": 0.9512025901942646, "grad_norm": 3.4949891567230225, "learning_rate": 6.232230907023407e-08, "loss": 0.8099, "step": 12339 }, { "epoch": 0.9512796793092815, "grad_norm": 3.4826712608337402, "learning_rate": 6.21259621506537e-08, "loss": 0.9186, "step": 12340 }, { "epoch": 0.9513567684242985, "grad_norm": 3.609074115753174, "learning_rate": 6.192992308194534e-08, "loss": 0.8708, "step": 12341 }, { "epoch": 0.9514338575393154, "grad_norm": 3.599756956100464, "learning_rate": 6.173419187633201e-08, "loss": 0.7601, "step": 12342 }, { "epoch": 0.9515109466543324, "grad_norm": 3.8272032737731934, "learning_rate": 6.15387685460156e-08, "loss": 0.9467, "step": 12343 }, { "epoch": 0.9515880357693494, "grad_norm": 3.584209680557251, "learning_rate": 6.134365310317969e-08, "loss": 0.898, "step": 12344 }, { "epoch": 0.9516651248843663, "grad_norm": 3.9957098960876465, "learning_rate": 6.114884555998957e-08, "loss": 0.9181, "step": 12345 }, { "epoch": 0.9517422139993833, "grad_norm": 3.9217257499694824, "learning_rate": 6.09543459285894e-08, "loss": 0.8856, "step": 12346 }, { "epoch": 0.9518193031144002, "grad_norm": 3.2795722484588623, "learning_rate": 6.07601542211067e-08, "loss": 0.8311, "step": 12347 }, { "epoch": 0.9518963922294172, "grad_norm": 3.9861719608306885, "learning_rate": 6.05662704496468e-08, "loss": 0.9533, "step": 12348 }, { "epoch": 0.9519734813444342, "grad_norm": 3.6338775157928467, "learning_rate": 6.03726946262978e-08, "loss": 0.8222, "step": 12349 }, { "epoch": 0.9520505704594511, "grad_norm": 4.2912726402282715, "learning_rate": 6.017942676312837e-08, "loss": 0.9267, "step": 12350 }, { "epoch": 0.9521276595744681, "grad_norm": 4.117364883422852, "learning_rate": 5.998646687218779e-08, "loss": 0.9223, "step": 12351 }, { "epoch": 0.952204748689485, "grad_norm": 4.384626865386963, "learning_rate": 5.97938149655064e-08, "loss": 1.0203, "step": 12352 }, { "epoch": 0.952281837804502, "grad_norm": 3.674335241317749, "learning_rate": 5.960147105509406e-08, "loss": 0.8207, "step": 12353 }, { "epoch": 0.952358926919519, "grad_norm": 3.7455525398254395, "learning_rate": 5.9409435152943395e-08, "loss": 0.8943, "step": 12354 }, { "epoch": 0.9524360160345359, "grad_norm": 3.3691940307617188, "learning_rate": 5.9217707271025937e-08, "loss": 0.899, "step": 12355 }, { "epoch": 0.9525131051495529, "grad_norm": 3.5628163814544678, "learning_rate": 5.9026287421296014e-08, "loss": 0.8902, "step": 12356 }, { "epoch": 0.9525901942645698, "grad_norm": 3.971393585205078, "learning_rate": 5.883517561568686e-08, "loss": 0.8993, "step": 12357 }, { "epoch": 0.9526672833795868, "grad_norm": 3.777592420578003, "learning_rate": 5.8644371866113934e-08, "loss": 0.8346, "step": 12358 }, { "epoch": 0.9527443724946038, "grad_norm": 3.8741729259490967, "learning_rate": 5.845387618447218e-08, "loss": 0.8584, "step": 12359 }, { "epoch": 0.9528214616096207, "grad_norm": 3.5481209754943848, "learning_rate": 5.82636885826382e-08, "loss": 0.802, "step": 12360 }, { "epoch": 0.9528985507246377, "grad_norm": 3.961042642593384, "learning_rate": 5.807380907246918e-08, "loss": 0.9015, "step": 12361 }, { "epoch": 0.9529756398396546, "grad_norm": 3.8133602142333984, "learning_rate": 5.788423766580342e-08, "loss": 0.7618, "step": 12362 }, { "epoch": 0.9530527289546716, "grad_norm": 3.639708995819092, "learning_rate": 5.769497437446037e-08, "loss": 0.794, "step": 12363 }, { "epoch": 0.9531298180696886, "grad_norm": 3.4450173377990723, "learning_rate": 5.7506019210237794e-08, "loss": 0.8323, "step": 12364 }, { "epoch": 0.9532069071847055, "grad_norm": 3.6423511505126953, "learning_rate": 5.73173721849174e-08, "loss": 0.8228, "step": 12365 }, { "epoch": 0.9532839962997225, "grad_norm": 3.7283174991607666, "learning_rate": 5.7129033310260316e-08, "loss": 0.9807, "step": 12366 }, { "epoch": 0.9533610854147394, "grad_norm": 3.4573850631713867, "learning_rate": 5.694100259800772e-08, "loss": 0.8502, "step": 12367 }, { "epoch": 0.9534381745297564, "grad_norm": 3.561988592147827, "learning_rate": 5.675328005988301e-08, "loss": 0.9494, "step": 12368 }, { "epoch": 0.9535152636447733, "grad_norm": 3.7618987560272217, "learning_rate": 5.6565865707590153e-08, "loss": 0.9674, "step": 12369 }, { "epoch": 0.9535923527597903, "grad_norm": 3.7699737548828125, "learning_rate": 5.637875955281202e-08, "loss": 0.7715, "step": 12370 }, { "epoch": 0.9536694418748073, "grad_norm": 3.5663082599639893, "learning_rate": 5.6191961607214853e-08, "loss": 0.8386, "step": 12371 }, { "epoch": 0.9537465309898242, "grad_norm": 3.4624545574188232, "learning_rate": 5.6005471882444316e-08, "loss": 0.8488, "step": 12372 }, { "epoch": 0.9538236201048412, "grad_norm": 4.092374801635742, "learning_rate": 5.581929039012668e-08, "loss": 0.9231, "step": 12373 }, { "epoch": 0.9539007092198581, "grad_norm": 4.778739929199219, "learning_rate": 5.563341714186987e-08, "loss": 1.0022, "step": 12374 }, { "epoch": 0.9539777983348752, "grad_norm": 3.724924325942993, "learning_rate": 5.5447852149262406e-08, "loss": 0.9458, "step": 12375 }, { "epoch": 0.9540548874498921, "grad_norm": 3.464205026626587, "learning_rate": 5.5262595423872244e-08, "loss": 0.8716, "step": 12376 }, { "epoch": 0.954131976564909, "grad_norm": 3.98112416267395, "learning_rate": 5.507764697725015e-08, "loss": 1.0144, "step": 12377 }, { "epoch": 0.954209065679926, "grad_norm": 3.9500741958618164, "learning_rate": 5.4893006820926355e-08, "loss": 0.9449, "step": 12378 }, { "epoch": 0.9542861547949429, "grad_norm": 3.776728391647339, "learning_rate": 5.4708674966412744e-08, "loss": 0.8444, "step": 12379 }, { "epoch": 0.95436324390996, "grad_norm": 4.004519939422607, "learning_rate": 5.4524651425200135e-08, "loss": 0.8892, "step": 12380 }, { "epoch": 0.9544403330249769, "grad_norm": 3.8165571689605713, "learning_rate": 5.434093620876213e-08, "loss": 0.9224, "step": 12381 }, { "epoch": 0.9545174221399938, "grad_norm": 3.499563217163086, "learning_rate": 5.4157529328552896e-08, "loss": 0.7655, "step": 12382 }, { "epoch": 0.9545945112550108, "grad_norm": 3.9584450721740723, "learning_rate": 5.397443079600662e-08, "loss": 0.8664, "step": 12383 }, { "epoch": 0.9546716003700277, "grad_norm": 4.017448425292969, "learning_rate": 5.379164062253861e-08, "loss": 0.9921, "step": 12384 }, { "epoch": 0.9547486894850448, "grad_norm": 3.8151862621307373, "learning_rate": 5.3609158819544205e-08, "loss": 0.8458, "step": 12385 }, { "epoch": 0.9548257786000617, "grad_norm": 3.502845287322998, "learning_rate": 5.3426985398400965e-08, "loss": 0.861, "step": 12386 }, { "epoch": 0.9549028677150786, "grad_norm": 3.5837574005126953, "learning_rate": 5.324512037046647e-08, "loss": 0.9356, "step": 12387 }, { "epoch": 0.9549799568300956, "grad_norm": 3.933488368988037, "learning_rate": 5.306356374707833e-08, "loss": 0.9829, "step": 12388 }, { "epoch": 0.9550570459451125, "grad_norm": 3.528136968612671, "learning_rate": 5.288231553955636e-08, "loss": 0.9494, "step": 12389 }, { "epoch": 0.9551341350601296, "grad_norm": 3.7610669136047363, "learning_rate": 5.270137575920098e-08, "loss": 0.9087, "step": 12390 }, { "epoch": 0.9552112241751465, "grad_norm": 3.8250885009765625, "learning_rate": 5.2520744417290936e-08, "loss": 0.9419, "step": 12391 }, { "epoch": 0.9552883132901634, "grad_norm": 3.721879720687866, "learning_rate": 5.2340421525089445e-08, "loss": 0.9422, "step": 12392 }, { "epoch": 0.9553654024051804, "grad_norm": 3.6027677059173584, "learning_rate": 5.2160407093838074e-08, "loss": 0.9309, "step": 12393 }, { "epoch": 0.9554424915201973, "grad_norm": 3.6538522243499756, "learning_rate": 5.19807011347595e-08, "loss": 0.9611, "step": 12394 }, { "epoch": 0.9555195806352144, "grad_norm": 4.078005790710449, "learning_rate": 5.180130365905811e-08, "loss": 0.9666, "step": 12395 }, { "epoch": 0.9555966697502313, "grad_norm": 3.643521785736084, "learning_rate": 5.162221467791772e-08, "loss": 0.8401, "step": 12396 }, { "epoch": 0.9556737588652482, "grad_norm": 3.5786333084106445, "learning_rate": 5.1443434202504414e-08, "loss": 0.8743, "step": 12397 }, { "epoch": 0.9557508479802652, "grad_norm": 3.833845853805542, "learning_rate": 5.1264962243963155e-08, "loss": 0.9121, "step": 12398 }, { "epoch": 0.9558279370952821, "grad_norm": 3.96075439453125, "learning_rate": 5.1086798813421715e-08, "loss": 0.9589, "step": 12399 }, { "epoch": 0.9559050262102992, "grad_norm": 3.644322156906128, "learning_rate": 5.090894392198731e-08, "loss": 0.9057, "step": 12400 }, { "epoch": 0.9559821153253161, "grad_norm": 3.6770665645599365, "learning_rate": 5.07313975807483e-08, "loss": 0.8914, "step": 12401 }, { "epoch": 0.956059204440333, "grad_norm": 3.862185001373291, "learning_rate": 5.0554159800773604e-08, "loss": 0.7902, "step": 12402 }, { "epoch": 0.95613629355535, "grad_norm": 3.8168914318084717, "learning_rate": 5.037723059311273e-08, "loss": 0.8907, "step": 12403 }, { "epoch": 0.9562133826703669, "grad_norm": 3.6184260845184326, "learning_rate": 5.0200609968797386e-08, "loss": 0.886, "step": 12404 }, { "epoch": 0.956290471785384, "grad_norm": 3.8705575466156006, "learning_rate": 5.0024297938838227e-08, "loss": 0.9254, "step": 12405 }, { "epoch": 0.9563675609004009, "grad_norm": 3.5495188236236572, "learning_rate": 4.9848294514227566e-08, "loss": 0.88, "step": 12406 }, { "epoch": 0.9564446500154178, "grad_norm": 3.8909826278686523, "learning_rate": 4.9672599705938294e-08, "loss": 1.0299, "step": 12407 }, { "epoch": 0.9565217391304348, "grad_norm": 3.9004476070404053, "learning_rate": 4.949721352492387e-08, "loss": 0.9281, "step": 12408 }, { "epoch": 0.9565988282454517, "grad_norm": 3.852919816970825, "learning_rate": 4.9322135982118877e-08, "loss": 0.9431, "step": 12409 }, { "epoch": 0.9566759173604688, "grad_norm": 3.943549633026123, "learning_rate": 4.914736708843848e-08, "loss": 0.905, "step": 12410 }, { "epoch": 0.9567530064754857, "grad_norm": 3.6017589569091797, "learning_rate": 4.89729068547784e-08, "loss": 0.9505, "step": 12411 }, { "epoch": 0.9568300955905026, "grad_norm": 3.4761962890625, "learning_rate": 4.8798755292016055e-08, "loss": 0.8673, "step": 12412 }, { "epoch": 0.9569071847055196, "grad_norm": 3.7245309352874756, "learning_rate": 4.8624912411007754e-08, "loss": 0.8425, "step": 12413 }, { "epoch": 0.9569842738205365, "grad_norm": 3.6705353260040283, "learning_rate": 4.8451378222592605e-08, "loss": 0.8727, "step": 12414 }, { "epoch": 0.9570613629355536, "grad_norm": 3.829969644546509, "learning_rate": 4.827815273758973e-08, "loss": 1.01, "step": 12415 }, { "epoch": 0.9571384520505705, "grad_norm": 3.7990779876708984, "learning_rate": 4.8105235966797727e-08, "loss": 0.8892, "step": 12416 }, { "epoch": 0.9572155411655874, "grad_norm": 3.6120383739471436, "learning_rate": 4.793262792099851e-08, "loss": 0.8526, "step": 12417 }, { "epoch": 0.9572926302806044, "grad_norm": 3.7359561920166016, "learning_rate": 4.776032861095181e-08, "loss": 0.9674, "step": 12418 }, { "epoch": 0.9573697193956213, "grad_norm": 3.7942633628845215, "learning_rate": 4.758833804740015e-08, "loss": 0.8739, "step": 12419 }, { "epoch": 0.9574468085106383, "grad_norm": 4.12003755569458, "learning_rate": 4.7416656241067175e-08, "loss": 1.0081, "step": 12420 }, { "epoch": 0.9575238976256553, "grad_norm": 3.9636361598968506, "learning_rate": 4.724528320265542e-08, "loss": 0.9851, "step": 12421 }, { "epoch": 0.9576009867406722, "grad_norm": 3.665433406829834, "learning_rate": 4.7074218942849134e-08, "loss": 0.9087, "step": 12422 }, { "epoch": 0.9576780758556892, "grad_norm": 3.686065673828125, "learning_rate": 4.6903463472313114e-08, "loss": 0.8049, "step": 12423 }, { "epoch": 0.9577551649707061, "grad_norm": 3.6736299991607666, "learning_rate": 4.6733016801693845e-08, "loss": 0.9044, "step": 12424 }, { "epoch": 0.9578322540857231, "grad_norm": 3.865446090698242, "learning_rate": 4.6562878941617264e-08, "loss": 1.0022, "step": 12425 }, { "epoch": 0.9579093432007401, "grad_norm": 3.7858636379241943, "learning_rate": 4.6393049902690446e-08, "loss": 0.8994, "step": 12426 }, { "epoch": 0.957986432315757, "grad_norm": 3.769582986831665, "learning_rate": 4.6223529695502144e-08, "loss": 0.8945, "step": 12427 }, { "epoch": 0.958063521430774, "grad_norm": 3.338371753692627, "learning_rate": 4.605431833062002e-08, "loss": 0.8453, "step": 12428 }, { "epoch": 0.9581406105457909, "grad_norm": 4.088877201080322, "learning_rate": 4.588541581859396e-08, "loss": 0.9191, "step": 12429 }, { "epoch": 0.9582176996608079, "grad_norm": 3.5991716384887695, "learning_rate": 4.5716822169954436e-08, "loss": 0.9485, "step": 12430 }, { "epoch": 0.9582947887758249, "grad_norm": 3.5620510578155518, "learning_rate": 4.554853739521192e-08, "loss": 0.9463, "step": 12431 }, { "epoch": 0.9583718778908418, "grad_norm": 3.7672712802886963, "learning_rate": 4.5380561504858586e-08, "loss": 0.87, "step": 12432 }, { "epoch": 0.9584489670058588, "grad_norm": 4.017693996429443, "learning_rate": 4.521289450936661e-08, "loss": 0.9304, "step": 12433 }, { "epoch": 0.9585260561208757, "grad_norm": 3.617096185684204, "learning_rate": 4.504553641918874e-08, "loss": 0.8937, "step": 12434 }, { "epoch": 0.9586031452358927, "grad_norm": 3.6982741355895996, "learning_rate": 4.4878487244759964e-08, "loss": 0.8577, "step": 12435 }, { "epoch": 0.9586802343509097, "grad_norm": 3.6797823905944824, "learning_rate": 4.471174699649361e-08, "loss": 0.8549, "step": 12436 }, { "epoch": 0.9587573234659266, "grad_norm": 4.134285926818848, "learning_rate": 4.4545315684785815e-08, "loss": 0.9191, "step": 12437 }, { "epoch": 0.9588344125809436, "grad_norm": 3.8390088081359863, "learning_rate": 4.437919332001328e-08, "loss": 0.9041, "step": 12438 }, { "epoch": 0.9589115016959605, "grad_norm": 3.7085273265838623, "learning_rate": 4.421337991253161e-08, "loss": 0.9955, "step": 12439 }, { "epoch": 0.9589885908109775, "grad_norm": 4.025529384613037, "learning_rate": 4.4047875472679194e-08, "loss": 1.0332, "step": 12440 }, { "epoch": 0.9590656799259945, "grad_norm": 3.5782909393310547, "learning_rate": 4.3882680010774445e-08, "loss": 0.988, "step": 12441 }, { "epoch": 0.9591427690410114, "grad_norm": 3.8593509197235107, "learning_rate": 4.3717793537115806e-08, "loss": 0.9466, "step": 12442 }, { "epoch": 0.9592198581560284, "grad_norm": 3.907071590423584, "learning_rate": 4.355321606198393e-08, "loss": 0.8672, "step": 12443 }, { "epoch": 0.9592969472710453, "grad_norm": 4.352113246917725, "learning_rate": 4.338894759563894e-08, "loss": 0.9618, "step": 12444 }, { "epoch": 0.9593740363860623, "grad_norm": 3.6431379318237305, "learning_rate": 4.3224988148321545e-08, "loss": 0.8185, "step": 12445 }, { "epoch": 0.9594511255010792, "grad_norm": 3.652925729751587, "learning_rate": 4.306133773025467e-08, "loss": 0.8715, "step": 12446 }, { "epoch": 0.9595282146160962, "grad_norm": 3.6305699348449707, "learning_rate": 4.2897996351640715e-08, "loss": 0.7884, "step": 12447 }, { "epoch": 0.9596053037311132, "grad_norm": 3.712374448776245, "learning_rate": 4.2734964022663194e-08, "loss": 0.8094, "step": 12448 }, { "epoch": 0.9596823928461301, "grad_norm": 3.571690797805786, "learning_rate": 4.257224075348676e-08, "loss": 0.7927, "step": 12449 }, { "epoch": 0.9597594819611471, "grad_norm": 3.573230743408203, "learning_rate": 4.240982655425552e-08, "loss": 0.8144, "step": 12450 }, { "epoch": 0.959836571076164, "grad_norm": 4.072946548461914, "learning_rate": 4.224772143509526e-08, "loss": 0.9237, "step": 12451 }, { "epoch": 0.959913660191181, "grad_norm": 3.8101742267608643, "learning_rate": 4.2085925406112894e-08, "loss": 0.9525, "step": 12452 }, { "epoch": 0.959990749306198, "grad_norm": 3.8451669216156006, "learning_rate": 4.192443847739536e-08, "loss": 0.8578, "step": 12453 }, { "epoch": 0.9600678384212149, "grad_norm": 3.733152389526367, "learning_rate": 4.1763260659011265e-08, "loss": 0.9414, "step": 12454 }, { "epoch": 0.9601449275362319, "grad_norm": 3.55338716506958, "learning_rate": 4.160239196100757e-08, "loss": 0.8958, "step": 12455 }, { "epoch": 0.9602220166512488, "grad_norm": 3.659872531890869, "learning_rate": 4.144183239341515e-08, "loss": 0.9185, "step": 12456 }, { "epoch": 0.9602991057662658, "grad_norm": 3.818180561065674, "learning_rate": 4.1281581966243214e-08, "loss": 0.9281, "step": 12457 }, { "epoch": 0.9603761948812828, "grad_norm": 4.13081169128418, "learning_rate": 4.1121640689482655e-08, "loss": 0.992, "step": 12458 }, { "epoch": 0.9604532839962997, "grad_norm": 3.2538528442382812, "learning_rate": 4.0962008573105505e-08, "loss": 0.7597, "step": 12459 }, { "epoch": 0.9605303731113167, "grad_norm": 3.9792425632476807, "learning_rate": 4.080268562706324e-08, "loss": 0.8961, "step": 12460 }, { "epoch": 0.9606074622263336, "grad_norm": 4.055764198303223, "learning_rate": 4.0643671861289035e-08, "loss": 0.8922, "step": 12461 }, { "epoch": 0.9606845513413506, "grad_norm": 3.6515095233917236, "learning_rate": 4.048496728569717e-08, "loss": 0.8648, "step": 12462 }, { "epoch": 0.9607616404563676, "grad_norm": 3.2547214031219482, "learning_rate": 4.0326571910180855e-08, "loss": 0.7415, "step": 12463 }, { "epoch": 0.9608387295713845, "grad_norm": 3.92317271232605, "learning_rate": 4.016848574461718e-08, "loss": 0.9378, "step": 12464 }, { "epoch": 0.9609158186864015, "grad_norm": 3.4518179893493652, "learning_rate": 4.001070879885993e-08, "loss": 0.7162, "step": 12465 }, { "epoch": 0.9609929078014184, "grad_norm": 3.502856969833374, "learning_rate": 3.9853241082746795e-08, "loss": 0.8978, "step": 12466 }, { "epoch": 0.9610699969164354, "grad_norm": 3.2858731746673584, "learning_rate": 3.9696082606094366e-08, "loss": 0.8471, "step": 12467 }, { "epoch": 0.9611470860314524, "grad_norm": 3.7062535285949707, "learning_rate": 3.953923337870147e-08, "loss": 0.893, "step": 12468 }, { "epoch": 0.9612241751464693, "grad_norm": 4.334238529205322, "learning_rate": 3.938269341034695e-08, "loss": 0.9144, "step": 12469 }, { "epoch": 0.9613012642614863, "grad_norm": 3.6347129344940186, "learning_rate": 3.922646271078911e-08, "loss": 0.8314, "step": 12470 }, { "epoch": 0.9613783533765032, "grad_norm": 4.064388751983643, "learning_rate": 3.907054128976906e-08, "loss": 0.9495, "step": 12471 }, { "epoch": 0.9614554424915202, "grad_norm": 3.732004165649414, "learning_rate": 3.8914929157007895e-08, "loss": 0.9922, "step": 12472 }, { "epoch": 0.9615325316065372, "grad_norm": 3.932603120803833, "learning_rate": 3.87596263222062e-08, "loss": 0.8979, "step": 12473 }, { "epoch": 0.9616096207215541, "grad_norm": 3.8675358295440674, "learning_rate": 3.860463279504678e-08, "loss": 0.9025, "step": 12474 }, { "epoch": 0.9616867098365711, "grad_norm": 3.780653715133667, "learning_rate": 3.8449948585193577e-08, "loss": 0.9724, "step": 12475 }, { "epoch": 0.961763798951588, "grad_norm": 3.6268699169158936, "learning_rate": 3.829557370228887e-08, "loss": 0.8795, "step": 12476 }, { "epoch": 0.961840888066605, "grad_norm": 3.926316976547241, "learning_rate": 3.814150815595774e-08, "loss": 0.8567, "step": 12477 }, { "epoch": 0.961917977181622, "grad_norm": 4.029868125915527, "learning_rate": 3.798775195580584e-08, "loss": 0.837, "step": 12478 }, { "epoch": 0.9619950662966389, "grad_norm": 3.769047260284424, "learning_rate": 3.783430511141828e-08, "loss": 0.7122, "step": 12479 }, { "epoch": 0.9620721554116559, "grad_norm": 3.4524729251861572, "learning_rate": 3.76811676323624e-08, "loss": 0.7635, "step": 12480 }, { "epoch": 0.9621492445266728, "grad_norm": 4.048440933227539, "learning_rate": 3.752833952818502e-08, "loss": 0.8472, "step": 12481 }, { "epoch": 0.9622263336416897, "grad_norm": 3.5961873531341553, "learning_rate": 3.737582080841462e-08, "loss": 0.9834, "step": 12482 }, { "epoch": 0.9623034227567068, "grad_norm": 4.128652095794678, "learning_rate": 3.722361148255971e-08, "loss": 0.8462, "step": 12483 }, { "epoch": 0.9623805118717237, "grad_norm": 3.8181169033050537, "learning_rate": 3.707171156010936e-08, "loss": 0.9219, "step": 12484 }, { "epoch": 0.9624576009867407, "grad_norm": 3.621793031692505, "learning_rate": 3.692012105053433e-08, "loss": 0.8931, "step": 12485 }, { "epoch": 0.9625346901017576, "grad_norm": 3.9379565715789795, "learning_rate": 3.6768839963285395e-08, "loss": 0.8306, "step": 12486 }, { "epoch": 0.9626117792167745, "grad_norm": 3.8023934364318848, "learning_rate": 3.66178683077939e-08, "loss": 0.97, "step": 12487 }, { "epoch": 0.9626888683317916, "grad_norm": 3.626626968383789, "learning_rate": 3.646720609347232e-08, "loss": 0.9197, "step": 12488 }, { "epoch": 0.9627659574468085, "grad_norm": 3.600571393966675, "learning_rate": 3.6316853329713686e-08, "loss": 0.9092, "step": 12489 }, { "epoch": 0.9628430465618255, "grad_norm": 3.4085299968719482, "learning_rate": 3.616681002589162e-08, "loss": 0.7884, "step": 12490 }, { "epoch": 0.9629201356768424, "grad_norm": 3.688203811645508, "learning_rate": 3.601707619136086e-08, "loss": 0.7688, "step": 12491 }, { "epoch": 0.9629972247918593, "grad_norm": 3.6305623054504395, "learning_rate": 3.586765183545615e-08, "loss": 0.9648, "step": 12492 }, { "epoch": 0.9630743139068764, "grad_norm": 3.8015410900115967, "learning_rate": 3.571853696749339e-08, "loss": 0.9645, "step": 12493 }, { "epoch": 0.9631514030218933, "grad_norm": 3.945363998413086, "learning_rate": 3.556973159676902e-08, "loss": 0.92, "step": 12494 }, { "epoch": 0.9632284921369103, "grad_norm": 3.6869564056396484, "learning_rate": 3.542123573256062e-08, "loss": 0.8454, "step": 12495 }, { "epoch": 0.9633055812519272, "grad_norm": 4.102055549621582, "learning_rate": 3.5273049384126345e-08, "loss": 0.9861, "step": 12496 }, { "epoch": 0.9633826703669441, "grad_norm": 4.952552318572998, "learning_rate": 3.51251725607038e-08, "loss": 0.8793, "step": 12497 }, { "epoch": 0.9634597594819612, "grad_norm": 3.5862975120544434, "learning_rate": 3.497760527151284e-08, "loss": 0.8908, "step": 12498 }, { "epoch": 0.9635368485969781, "grad_norm": 3.9423680305480957, "learning_rate": 3.4830347525754425e-08, "loss": 0.8734, "step": 12499 }, { "epoch": 0.9636139377119951, "grad_norm": 4.143372058868408, "learning_rate": 3.4683399332607893e-08, "loss": 0.9683, "step": 12500 }, { "epoch": 0.963691026827012, "grad_norm": 3.9046006202697754, "learning_rate": 3.45367607012359e-08, "loss": 0.9009, "step": 12501 }, { "epoch": 0.9637681159420289, "grad_norm": 3.8234989643096924, "learning_rate": 3.439043164078004e-08, "loss": 1.0309, "step": 12502 }, { "epoch": 0.963845205057046, "grad_norm": 3.920905828475952, "learning_rate": 3.424441216036301e-08, "loss": 0.9936, "step": 12503 }, { "epoch": 0.9639222941720629, "grad_norm": 3.868464231491089, "learning_rate": 3.409870226908863e-08, "loss": 0.8631, "step": 12504 }, { "epoch": 0.9639993832870799, "grad_norm": 3.8888497352600098, "learning_rate": 3.3953301976040743e-08, "loss": 0.9221, "step": 12505 }, { "epoch": 0.9640764724020968, "grad_norm": 3.7030375003814697, "learning_rate": 3.3808211290284886e-08, "loss": 0.8446, "step": 12506 }, { "epoch": 0.9641535615171137, "grad_norm": 4.315587520599365, "learning_rate": 3.36634302208666e-08, "loss": 0.9185, "step": 12507 }, { "epoch": 0.9642306506321308, "grad_norm": 3.6543986797332764, "learning_rate": 3.351895877681255e-08, "loss": 0.9264, "step": 12508 }, { "epoch": 0.9643077397471477, "grad_norm": 3.710772752761841, "learning_rate": 3.3374796967128866e-08, "loss": 0.9397, "step": 12509 }, { "epoch": 0.9643848288621647, "grad_norm": 3.6390933990478516, "learning_rate": 3.3230944800803355e-08, "loss": 0.8503, "step": 12510 }, { "epoch": 0.9644619179771816, "grad_norm": 4.070563793182373, "learning_rate": 3.3087402286805514e-08, "loss": 0.8884, "step": 12511 }, { "epoch": 0.9645390070921985, "grad_norm": 3.739609718322754, "learning_rate": 3.294416943408374e-08, "loss": 0.9062, "step": 12512 }, { "epoch": 0.9646160962072156, "grad_norm": 3.311293840408325, "learning_rate": 3.2801246251568106e-08, "loss": 0.8365, "step": 12513 }, { "epoch": 0.9646931853222325, "grad_norm": 3.451474666595459, "learning_rate": 3.2658632748168714e-08, "loss": 0.829, "step": 12514 }, { "epoch": 0.9647702744372495, "grad_norm": 3.717940092086792, "learning_rate": 3.2516328932776786e-08, "loss": 0.8624, "step": 12515 }, { "epoch": 0.9648473635522664, "grad_norm": 3.5586538314819336, "learning_rate": 3.2374334814265216e-08, "loss": 0.8706, "step": 12516 }, { "epoch": 0.9649244526672833, "grad_norm": 3.5937881469726562, "learning_rate": 3.223265040148527e-08, "loss": 0.9791, "step": 12517 }, { "epoch": 0.9650015417823004, "grad_norm": 3.6897778511047363, "learning_rate": 3.209127570327153e-08, "loss": 0.8629, "step": 12518 }, { "epoch": 0.9650786308973173, "grad_norm": 3.6691691875457764, "learning_rate": 3.19502107284364e-08, "loss": 0.9409, "step": 12519 }, { "epoch": 0.9651557200123343, "grad_norm": 3.7458150386810303, "learning_rate": 3.180945548577619e-08, "loss": 0.9058, "step": 12520 }, { "epoch": 0.9652328091273512, "grad_norm": 3.7683491706848145, "learning_rate": 3.166900998406497e-08, "loss": 0.9012, "step": 12521 }, { "epoch": 0.9653098982423681, "grad_norm": 3.705265522003174, "learning_rate": 3.1528874232059635e-08, "loss": 0.813, "step": 12522 }, { "epoch": 0.9653869873573852, "grad_norm": 3.670219898223877, "learning_rate": 3.138904823849653e-08, "loss": 0.8558, "step": 12523 }, { "epoch": 0.9654640764724021, "grad_norm": 3.741985321044922, "learning_rate": 3.124953201209313e-08, "loss": 0.9858, "step": 12524 }, { "epoch": 0.9655411655874191, "grad_norm": 3.5063395500183105, "learning_rate": 3.1110325561547473e-08, "loss": 0.7742, "step": 12525 }, { "epoch": 0.965618254702436, "grad_norm": 3.4746077060699463, "learning_rate": 3.097142889553872e-08, "loss": 0.8353, "step": 12526 }, { "epoch": 0.9656953438174529, "grad_norm": 3.630375385284424, "learning_rate": 3.083284202272607e-08, "loss": 0.84, "step": 12527 }, { "epoch": 0.96577243293247, "grad_norm": 4.338189125061035, "learning_rate": 3.069456495174983e-08, "loss": 0.8878, "step": 12528 }, { "epoch": 0.9658495220474869, "grad_norm": 3.834465503692627, "learning_rate": 3.0556597691230317e-08, "loss": 0.8431, "step": 12529 }, { "epoch": 0.9659266111625039, "grad_norm": 3.5252437591552734, "learning_rate": 3.04189402497701e-08, "loss": 0.7253, "step": 12530 }, { "epoch": 0.9660037002775208, "grad_norm": 3.7228147983551025, "learning_rate": 3.028159263595065e-08, "loss": 0.9094, "step": 12531 }, { "epoch": 0.9660807893925377, "grad_norm": 4.035754680633545, "learning_rate": 3.0144554858334563e-08, "loss": 0.994, "step": 12532 }, { "epoch": 0.9661578785075547, "grad_norm": 4.05665922164917, "learning_rate": 3.000782692546667e-08, "loss": 0.869, "step": 12533 }, { "epoch": 0.9662349676225717, "grad_norm": 3.910731554031372, "learning_rate": 2.9871408845870144e-08, "loss": 0.9663, "step": 12534 }, { "epoch": 0.9663120567375887, "grad_norm": 4.1619696617126465, "learning_rate": 2.973530062805041e-08, "loss": 0.8635, "step": 12535 }, { "epoch": 0.9663891458526056, "grad_norm": 3.829831838607788, "learning_rate": 2.959950228049291e-08, "loss": 0.9295, "step": 12536 }, { "epoch": 0.9664662349676225, "grad_norm": 3.9368739128112793, "learning_rate": 2.9464013811664192e-08, "loss": 0.8815, "step": 12537 }, { "epoch": 0.9665433240826395, "grad_norm": 3.4988670349121094, "learning_rate": 2.9328835230011398e-08, "loss": 0.8505, "step": 12538 }, { "epoch": 0.9666204131976565, "grad_norm": 3.812965154647827, "learning_rate": 2.9193966543961673e-08, "loss": 0.8958, "step": 12539 }, { "epoch": 0.9666975023126735, "grad_norm": 3.911647081375122, "learning_rate": 2.905940776192384e-08, "loss": 0.9759, "step": 12540 }, { "epoch": 0.9667745914276904, "grad_norm": 3.842432975769043, "learning_rate": 2.89251588922862e-08, "loss": 0.9148, "step": 12541 }, { "epoch": 0.9668516805427073, "grad_norm": 3.5029232501983643, "learning_rate": 2.8791219943419824e-08, "loss": 0.7411, "step": 12542 }, { "epoch": 0.9669287696577243, "grad_norm": 3.9447033405303955, "learning_rate": 2.8657590923673596e-08, "loss": 0.9414, "step": 12543 }, { "epoch": 0.9670058587727413, "grad_norm": 3.7002828121185303, "learning_rate": 2.852427184138029e-08, "loss": 0.9014, "step": 12544 }, { "epoch": 0.9670829478877583, "grad_norm": 3.4802746772766113, "learning_rate": 2.839126270484993e-08, "loss": 0.8871, "step": 12545 }, { "epoch": 0.9671600370027752, "grad_norm": 3.994755744934082, "learning_rate": 2.825856352237588e-08, "loss": 0.8622, "step": 12546 }, { "epoch": 0.9672371261177922, "grad_norm": 3.6245877742767334, "learning_rate": 2.812617430223097e-08, "loss": 0.802, "step": 12547 }, { "epoch": 0.9673142152328091, "grad_norm": 4.391573429107666, "learning_rate": 2.7994095052669147e-08, "loss": 0.9172, "step": 12548 }, { "epoch": 0.967391304347826, "grad_norm": 3.789491653442383, "learning_rate": 2.786232578192494e-08, "loss": 0.9283, "step": 12549 }, { "epoch": 0.9674683934628431, "grad_norm": 3.5741639137268066, "learning_rate": 2.773086649821344e-08, "loss": 0.9144, "step": 12550 }, { "epoch": 0.96754548257786, "grad_norm": 3.636545181274414, "learning_rate": 2.7599717209730316e-08, "loss": 0.823, "step": 12551 }, { "epoch": 0.967622571692877, "grad_norm": 3.7562508583068848, "learning_rate": 2.7468877924651804e-08, "loss": 0.9054, "step": 12552 }, { "epoch": 0.9676996608078939, "grad_norm": 3.873007297515869, "learning_rate": 2.733834865113527e-08, "loss": 0.9583, "step": 12553 }, { "epoch": 0.9677767499229109, "grad_norm": 3.7551496028900146, "learning_rate": 2.7208129397319206e-08, "loss": 0.8355, "step": 12554 }, { "epoch": 0.9678538390379279, "grad_norm": 3.8303260803222656, "learning_rate": 2.7078220171321556e-08, "loss": 0.8735, "step": 12555 }, { "epoch": 0.9679309281529448, "grad_norm": 3.5815868377685547, "learning_rate": 2.6948620981240848e-08, "loss": 0.7389, "step": 12556 }, { "epoch": 0.9680080172679618, "grad_norm": 3.6111199855804443, "learning_rate": 2.6819331835157836e-08, "loss": 0.7886, "step": 12557 }, { "epoch": 0.9680851063829787, "grad_norm": 3.4187819957733154, "learning_rate": 2.669035274113274e-08, "loss": 0.8699, "step": 12558 }, { "epoch": 0.9681621954979956, "grad_norm": 3.8764898777008057, "learning_rate": 2.6561683707206354e-08, "loss": 0.921, "step": 12559 }, { "epoch": 0.9682392846130127, "grad_norm": 3.894237756729126, "learning_rate": 2.6433324741401702e-08, "loss": 0.9434, "step": 12560 }, { "epoch": 0.9683163737280296, "grad_norm": 3.7127158641815186, "learning_rate": 2.6305275851720158e-08, "loss": 0.8949, "step": 12561 }, { "epoch": 0.9683934628430466, "grad_norm": 3.729252576828003, "learning_rate": 2.6177537046144786e-08, "loss": 0.8965, "step": 12562 }, { "epoch": 0.9684705519580635, "grad_norm": 4.151895523071289, "learning_rate": 2.6050108332640323e-08, "loss": 1.0355, "step": 12563 }, { "epoch": 0.9685476410730804, "grad_norm": 3.9273674488067627, "learning_rate": 2.5922989719150414e-08, "loss": 0.8994, "step": 12564 }, { "epoch": 0.9686247301880975, "grad_norm": 3.550187349319458, "learning_rate": 2.5796181213601503e-08, "loss": 0.9353, "step": 12565 }, { "epoch": 0.9687018193031144, "grad_norm": 3.781679153442383, "learning_rate": 2.5669682823897813e-08, "loss": 0.9055, "step": 12566 }, { "epoch": 0.9687789084181314, "grad_norm": 3.9291346073150635, "learning_rate": 2.554349455792693e-08, "loss": 0.8633, "step": 12567 }, { "epoch": 0.9688559975331483, "grad_norm": 4.16334867477417, "learning_rate": 2.541761642355589e-08, "loss": 0.9427, "step": 12568 }, { "epoch": 0.9689330866481652, "grad_norm": 3.531303882598877, "learning_rate": 2.529204842863231e-08, "loss": 0.773, "step": 12569 }, { "epoch": 0.9690101757631823, "grad_norm": 3.517418146133423, "learning_rate": 2.5166790580984923e-08, "loss": 0.8888, "step": 12570 }, { "epoch": 0.9690872648781992, "grad_norm": 3.5748257637023926, "learning_rate": 2.504184288842304e-08, "loss": 0.9294, "step": 12571 }, { "epoch": 0.9691643539932162, "grad_norm": 3.5963714122772217, "learning_rate": 2.4917205358735986e-08, "loss": 0.9087, "step": 12572 }, { "epoch": 0.9692414431082331, "grad_norm": 3.668978691101074, "learning_rate": 2.4792877999694764e-08, "loss": 0.8818, "step": 12573 }, { "epoch": 0.96931853222325, "grad_norm": 3.8249216079711914, "learning_rate": 2.46688608190504e-08, "loss": 0.9889, "step": 12574 }, { "epoch": 0.9693956213382671, "grad_norm": 3.7434964179992676, "learning_rate": 2.4545153824534483e-08, "loss": 0.906, "step": 12575 }, { "epoch": 0.969472710453284, "grad_norm": 3.613534450531006, "learning_rate": 2.4421757023859737e-08, "loss": 0.8281, "step": 12576 }, { "epoch": 0.969549799568301, "grad_norm": 3.6317696571350098, "learning_rate": 2.4298670424718895e-08, "loss": 0.8908, "step": 12577 }, { "epoch": 0.9696268886833179, "grad_norm": 3.982708215713501, "learning_rate": 2.417589403478693e-08, "loss": 0.857, "step": 12578 }, { "epoch": 0.9697039777983348, "grad_norm": 3.8874056339263916, "learning_rate": 2.4053427861716605e-08, "loss": 0.9243, "step": 12579 }, { "epoch": 0.9697810669133519, "grad_norm": 3.79288649559021, "learning_rate": 2.3931271913144595e-08, "loss": 0.9047, "step": 12580 }, { "epoch": 0.9698581560283688, "grad_norm": 3.9113500118255615, "learning_rate": 2.3809426196685913e-08, "loss": 1.0422, "step": 12581 }, { "epoch": 0.9699352451433858, "grad_norm": 3.4592325687408447, "learning_rate": 2.3687890719937266e-08, "loss": 0.7883, "step": 12582 }, { "epoch": 0.9700123342584027, "grad_norm": 4.037449836730957, "learning_rate": 2.3566665490475372e-08, "loss": 0.9285, "step": 12583 }, { "epoch": 0.9700894233734196, "grad_norm": 3.595924139022827, "learning_rate": 2.3445750515858067e-08, "loss": 0.8422, "step": 12584 }, { "epoch": 0.9701665124884367, "grad_norm": 3.691532611846924, "learning_rate": 2.3325145803624327e-08, "loss": 0.9454, "step": 12585 }, { "epoch": 0.9702436016034536, "grad_norm": 3.8150765895843506, "learning_rate": 2.3204851361293136e-08, "loss": 0.944, "step": 12586 }, { "epoch": 0.9703206907184706, "grad_norm": 4.036136627197266, "learning_rate": 2.308486719636349e-08, "loss": 0.9124, "step": 12587 }, { "epoch": 0.9703977798334875, "grad_norm": 3.491523504257202, "learning_rate": 2.2965193316316635e-08, "loss": 0.9327, "step": 12588 }, { "epoch": 0.9704748689485044, "grad_norm": 4.169717788696289, "learning_rate": 2.2845829728613268e-08, "loss": 0.8412, "step": 12589 }, { "epoch": 0.9705519580635215, "grad_norm": 3.619222640991211, "learning_rate": 2.2726776440694653e-08, "loss": 0.8712, "step": 12590 }, { "epoch": 0.9706290471785384, "grad_norm": 3.93977427482605, "learning_rate": 2.2608033459983747e-08, "loss": 0.9405, "step": 12591 }, { "epoch": 0.9707061362935554, "grad_norm": 3.866957187652588, "learning_rate": 2.2489600793883516e-08, "loss": 0.9402, "step": 12592 }, { "epoch": 0.9707832254085723, "grad_norm": 3.519181966781616, "learning_rate": 2.2371478449777495e-08, "loss": 0.8973, "step": 12593 }, { "epoch": 0.9708603145235892, "grad_norm": 3.5281713008880615, "learning_rate": 2.2253666435029797e-08, "loss": 0.839, "step": 12594 }, { "epoch": 0.9709374036386063, "grad_norm": 3.604396343231201, "learning_rate": 2.2136164756985655e-08, "loss": 0.8367, "step": 12595 }, { "epoch": 0.9710144927536232, "grad_norm": 3.734344720840454, "learning_rate": 2.201897342297088e-08, "loss": 0.9964, "step": 12596 }, { "epoch": 0.9710915818686402, "grad_norm": 4.021172523498535, "learning_rate": 2.190209244029129e-08, "loss": 0.7933, "step": 12597 }, { "epoch": 0.9711686709836571, "grad_norm": 3.9066355228424072, "learning_rate": 2.1785521816233834e-08, "loss": 1.0668, "step": 12598 }, { "epoch": 0.971245760098674, "grad_norm": 4.022904396057129, "learning_rate": 2.166926155806659e-08, "loss": 0.9402, "step": 12599 }, { "epoch": 0.971322849213691, "grad_norm": 3.6551458835601807, "learning_rate": 2.155331167303709e-08, "loss": 0.8959, "step": 12600 }, { "epoch": 0.971399938328708, "grad_norm": 3.6164748668670654, "learning_rate": 2.1437672168374557e-08, "loss": 0.8252, "step": 12601 }, { "epoch": 0.971477027443725, "grad_norm": 3.7705652713775635, "learning_rate": 2.1322343051289328e-08, "loss": 0.976, "step": 12602 }, { "epoch": 0.9715541165587419, "grad_norm": 3.6110386848449707, "learning_rate": 2.1207324328970103e-08, "loss": 0.925, "step": 12603 }, { "epoch": 0.9716312056737588, "grad_norm": 3.571214199066162, "learning_rate": 2.1092616008588364e-08, "loss": 0.9206, "step": 12604 }, { "epoch": 0.9717082947887759, "grad_norm": 3.6576857566833496, "learning_rate": 2.0978218097295612e-08, "loss": 0.8903, "step": 12605 }, { "epoch": 0.9717853839037928, "grad_norm": 3.6607632637023926, "learning_rate": 2.086413060222392e-08, "loss": 0.8791, "step": 12606 }, { "epoch": 0.9718624730188098, "grad_norm": 3.758730888366699, "learning_rate": 2.0750353530485932e-08, "loss": 0.8838, "step": 12607 }, { "epoch": 0.9719395621338267, "grad_norm": 4.265690326690674, "learning_rate": 2.0636886889175978e-08, "loss": 0.9786, "step": 12608 }, { "epoch": 0.9720166512488436, "grad_norm": 3.9939539432525635, "learning_rate": 2.0523730685366726e-08, "loss": 0.9212, "step": 12609 }, { "epoch": 0.9720937403638606, "grad_norm": 3.4965243339538574, "learning_rate": 2.0410884926113094e-08, "loss": 0.9082, "step": 12610 }, { "epoch": 0.9721708294788776, "grad_norm": 4.189631938934326, "learning_rate": 2.0298349618451673e-08, "loss": 0.9899, "step": 12611 }, { "epoch": 0.9722479185938946, "grad_norm": 3.7480947971343994, "learning_rate": 2.0186124769396855e-08, "loss": 0.9383, "step": 12612 }, { "epoch": 0.9723250077089115, "grad_norm": 3.6697635650634766, "learning_rate": 2.0074210385946925e-08, "loss": 0.9815, "step": 12613 }, { "epoch": 0.9724020968239284, "grad_norm": 3.7893688678741455, "learning_rate": 1.996260647507797e-08, "loss": 1.0289, "step": 12614 }, { "epoch": 0.9724791859389454, "grad_norm": 3.7555932998657227, "learning_rate": 1.9851313043747767e-08, "loss": 0.8935, "step": 12615 }, { "epoch": 0.9725562750539624, "grad_norm": 3.678717851638794, "learning_rate": 1.9740330098895755e-08, "loss": 0.8674, "step": 12616 }, { "epoch": 0.9726333641689794, "grad_norm": 3.5781774520874023, "learning_rate": 1.962965764744029e-08, "loss": 0.9078, "step": 12617 }, { "epoch": 0.9727104532839963, "grad_norm": 3.7126963138580322, "learning_rate": 1.951929569628197e-08, "loss": 0.8724, "step": 12618 }, { "epoch": 0.9727875423990132, "grad_norm": 3.61206316947937, "learning_rate": 1.9409244252301396e-08, "loss": 0.788, "step": 12619 }, { "epoch": 0.9728646315140302, "grad_norm": 3.783534049987793, "learning_rate": 1.929950332235864e-08, "loss": 0.9197, "step": 12620 }, { "epoch": 0.9729417206290472, "grad_norm": 3.796304225921631, "learning_rate": 1.9190072913296555e-08, "loss": 0.8634, "step": 12621 }, { "epoch": 0.9730188097440642, "grad_norm": 4.072221755981445, "learning_rate": 1.908095303193691e-08, "loss": 0.9239, "step": 12622 }, { "epoch": 0.9730958988590811, "grad_norm": 4.112748146057129, "learning_rate": 1.8972143685083155e-08, "loss": 0.9495, "step": 12623 }, { "epoch": 0.973172987974098, "grad_norm": 3.8566768169403076, "learning_rate": 1.886364487951875e-08, "loss": 0.9331, "step": 12624 }, { "epoch": 0.973250077089115, "grad_norm": 3.7345011234283447, "learning_rate": 1.8755456622008283e-08, "loss": 0.8506, "step": 12625 }, { "epoch": 0.973327166204132, "grad_norm": 4.711991310119629, "learning_rate": 1.864757891929636e-08, "loss": 0.934, "step": 12626 }, { "epoch": 0.973404255319149, "grad_norm": 4.006261825561523, "learning_rate": 1.8540011778108714e-08, "loss": 1.0348, "step": 12627 }, { "epoch": 0.9734813444341659, "grad_norm": 3.980159282684326, "learning_rate": 1.84327552051522e-08, "loss": 0.8444, "step": 12628 }, { "epoch": 0.9735584335491828, "grad_norm": 3.6981513500213623, "learning_rate": 1.8325809207112576e-08, "loss": 0.9964, "step": 12629 }, { "epoch": 0.9736355226641998, "grad_norm": 3.5920050144195557, "learning_rate": 1.8219173790658406e-08, "loss": 0.8259, "step": 12630 }, { "epoch": 0.9737126117792168, "grad_norm": 3.436185836791992, "learning_rate": 1.8112848962437146e-08, "loss": 0.8754, "step": 12631 }, { "epoch": 0.9737897008942338, "grad_norm": 3.517681360244751, "learning_rate": 1.800683472907794e-08, "loss": 0.8284, "step": 12632 }, { "epoch": 0.9738667900092507, "grad_norm": 3.979491710662842, "learning_rate": 1.7901131097190494e-08, "loss": 0.9857, "step": 12633 }, { "epoch": 0.9739438791242676, "grad_norm": 3.7884466648101807, "learning_rate": 1.7795738073364543e-08, "loss": 0.8687, "step": 12634 }, { "epoch": 0.9740209682392846, "grad_norm": 3.576528787612915, "learning_rate": 1.7690655664170388e-08, "loss": 0.7963, "step": 12635 }, { "epoch": 0.9740980573543015, "grad_norm": 4.090680122375488, "learning_rate": 1.7585883876160003e-08, "loss": 0.8982, "step": 12636 }, { "epoch": 0.9741751464693186, "grad_norm": 3.728907346725464, "learning_rate": 1.7481422715865394e-08, "loss": 0.8825, "step": 12637 }, { "epoch": 0.9742522355843355, "grad_norm": 3.741525411605835, "learning_rate": 1.737727218979912e-08, "loss": 0.9252, "step": 12638 }, { "epoch": 0.9743293246993524, "grad_norm": 3.8002474308013916, "learning_rate": 1.7273432304453774e-08, "loss": 0.7882, "step": 12639 }, { "epoch": 0.9744064138143694, "grad_norm": 3.745436191558838, "learning_rate": 1.7169903066303618e-08, "loss": 0.9055, "step": 12640 }, { "epoch": 0.9744835029293863, "grad_norm": 3.9796302318573, "learning_rate": 1.7066684481803486e-08, "loss": 0.9246, "step": 12641 }, { "epoch": 0.9745605920444034, "grad_norm": 3.8389484882354736, "learning_rate": 1.6963776557388235e-08, "loss": 0.82, "step": 12642 }, { "epoch": 0.9746376811594203, "grad_norm": 3.872706651687622, "learning_rate": 1.686117929947384e-08, "loss": 0.9159, "step": 12643 }, { "epoch": 0.9747147702744372, "grad_norm": 4.000880241394043, "learning_rate": 1.6758892714456853e-08, "loss": 0.8, "step": 12644 }, { "epoch": 0.9747918593894542, "grad_norm": 3.820415735244751, "learning_rate": 1.665691680871384e-08, "loss": 0.9869, "step": 12645 }, { "epoch": 0.9748689485044711, "grad_norm": 3.617678165435791, "learning_rate": 1.6555251588602496e-08, "loss": 0.9262, "step": 12646 }, { "epoch": 0.9749460376194882, "grad_norm": 3.6912779808044434, "learning_rate": 1.6453897060461076e-08, "loss": 0.9492, "step": 12647 }, { "epoch": 0.9750231267345051, "grad_norm": 3.9584383964538574, "learning_rate": 1.6352853230609534e-08, "loss": 0.972, "step": 12648 }, { "epoch": 0.975100215849522, "grad_norm": 3.8923022747039795, "learning_rate": 1.6252120105345604e-08, "loss": 0.993, "step": 12649 }, { "epoch": 0.975177304964539, "grad_norm": 3.5396828651428223, "learning_rate": 1.6151697690951484e-08, "loss": 0.83, "step": 12650 }, { "epoch": 0.9752543940795559, "grad_norm": 3.774773359298706, "learning_rate": 1.6051585993686614e-08, "loss": 0.8335, "step": 12651 }, { "epoch": 0.975331483194573, "grad_norm": 3.4751946926116943, "learning_rate": 1.5951785019792666e-08, "loss": 0.8478, "step": 12652 }, { "epoch": 0.9754085723095899, "grad_norm": 3.7243905067443848, "learning_rate": 1.5852294775491882e-08, "loss": 0.8084, "step": 12653 }, { "epoch": 0.9754856614246068, "grad_norm": 3.5833497047424316, "learning_rate": 1.575311526698653e-08, "loss": 0.7971, "step": 12654 }, { "epoch": 0.9755627505396238, "grad_norm": 3.848736524581909, "learning_rate": 1.56542465004611e-08, "loss": 0.9441, "step": 12655 }, { "epoch": 0.9756398396546407, "grad_norm": 3.8537116050720215, "learning_rate": 1.5555688482078446e-08, "loss": 0.8782, "step": 12656 }, { "epoch": 0.9757169287696578, "grad_norm": 3.7755062580108643, "learning_rate": 1.545744121798365e-08, "loss": 0.823, "step": 12657 }, { "epoch": 0.9757940178846747, "grad_norm": 3.62941837310791, "learning_rate": 1.535950471430181e-08, "loss": 0.8467, "step": 12658 }, { "epoch": 0.9758711069996916, "grad_norm": 3.677624464035034, "learning_rate": 1.52618789771386e-08, "loss": 0.9067, "step": 12659 }, { "epoch": 0.9759481961147086, "grad_norm": 3.693296432495117, "learning_rate": 1.516456401258082e-08, "loss": 0.8758, "step": 12660 }, { "epoch": 0.9760252852297255, "grad_norm": 4.068735122680664, "learning_rate": 1.5067559826695277e-08, "loss": 1.0129, "step": 12661 }, { "epoch": 0.9761023743447426, "grad_norm": 3.9047787189483643, "learning_rate": 1.4970866425529916e-08, "loss": 0.8848, "step": 12662 }, { "epoch": 0.9761794634597595, "grad_norm": 3.9053268432617188, "learning_rate": 1.4874483815112694e-08, "loss": 0.9414, "step": 12663 }, { "epoch": 0.9762565525747764, "grad_norm": 3.717155933380127, "learning_rate": 1.477841200145269e-08, "loss": 0.8827, "step": 12664 }, { "epoch": 0.9763336416897934, "grad_norm": 3.714543581008911, "learning_rate": 1.468265099053956e-08, "loss": 0.9911, "step": 12665 }, { "epoch": 0.9764107308048103, "grad_norm": 3.9254050254821777, "learning_rate": 1.4587200788343524e-08, "loss": 0.9414, "step": 12666 }, { "epoch": 0.9764878199198274, "grad_norm": 3.731003999710083, "learning_rate": 1.4492061400815383e-08, "loss": 0.8444, "step": 12667 }, { "epoch": 0.9765649090348443, "grad_norm": 3.458054542541504, "learning_rate": 1.4397232833887053e-08, "loss": 0.717, "step": 12668 }, { "epoch": 0.9766419981498612, "grad_norm": 4.42410945892334, "learning_rate": 1.4302715093469365e-08, "loss": 0.8526, "step": 12669 }, { "epoch": 0.9767190872648782, "grad_norm": 3.618612289428711, "learning_rate": 1.4208508185456493e-08, "loss": 0.9025, "step": 12670 }, { "epoch": 0.9767961763798951, "grad_norm": 3.267320156097412, "learning_rate": 1.4114612115720961e-08, "loss": 0.8451, "step": 12671 }, { "epoch": 0.9768732654949122, "grad_norm": 3.8861756324768066, "learning_rate": 1.4021026890116418e-08, "loss": 0.9779, "step": 12672 }, { "epoch": 0.9769503546099291, "grad_norm": 3.7133381366729736, "learning_rate": 1.3927752514478198e-08, "loss": 0.9188, "step": 12673 }, { "epoch": 0.977027443724946, "grad_norm": 4.010810375213623, "learning_rate": 1.383478899462054e-08, "loss": 0.9097, "step": 12674 }, { "epoch": 0.977104532839963, "grad_norm": 4.118958473205566, "learning_rate": 1.3742136336340473e-08, "loss": 0.9476, "step": 12675 }, { "epoch": 0.9771816219549799, "grad_norm": 4.091235160827637, "learning_rate": 1.3649794545413376e-08, "loss": 0.9998, "step": 12676 }, { "epoch": 0.977258711069997, "grad_norm": 3.626465082168579, "learning_rate": 1.3557763627596309e-08, "loss": 0.9915, "step": 12677 }, { "epoch": 0.9773358001850139, "grad_norm": 3.511397361755371, "learning_rate": 1.3466043588628019e-08, "loss": 0.9487, "step": 12678 }, { "epoch": 0.9774128893000308, "grad_norm": 3.758230447769165, "learning_rate": 1.3374634434225043e-08, "loss": 0.9622, "step": 12679 }, { "epoch": 0.9774899784150478, "grad_norm": 4.067485332489014, "learning_rate": 1.3283536170087818e-08, "loss": 1.0281, "step": 12680 }, { "epoch": 0.9775670675300647, "grad_norm": 3.6686248779296875, "learning_rate": 1.3192748801895139e-08, "loss": 0.86, "step": 12681 }, { "epoch": 0.9776441566450818, "grad_norm": 3.7029106616973877, "learning_rate": 1.3102272335307476e-08, "loss": 0.7519, "step": 12682 }, { "epoch": 0.9777212457600987, "grad_norm": 3.997795343399048, "learning_rate": 1.301210677596476e-08, "loss": 0.8996, "step": 12683 }, { "epoch": 0.9777983348751156, "grad_norm": 3.61710262298584, "learning_rate": 1.2922252129489165e-08, "loss": 0.9548, "step": 12684 }, { "epoch": 0.9778754239901326, "grad_norm": 3.6802361011505127, "learning_rate": 1.2832708401482319e-08, "loss": 0.8405, "step": 12685 }, { "epoch": 0.9779525131051495, "grad_norm": 3.580125331878662, "learning_rate": 1.2743475597526978e-08, "loss": 0.8522, "step": 12686 }, { "epoch": 0.9780296022201665, "grad_norm": 3.6615421772003174, "learning_rate": 1.265455372318647e-08, "loss": 0.9192, "step": 12687 }, { "epoch": 0.9781066913351835, "grad_norm": 3.9896399974823, "learning_rate": 1.2565942784004692e-08, "loss": 0.9484, "step": 12688 }, { "epoch": 0.9781837804502004, "grad_norm": 4.462338447570801, "learning_rate": 1.2477642785505006e-08, "loss": 0.9435, "step": 12689 }, { "epoch": 0.9782608695652174, "grad_norm": 3.643880605697632, "learning_rate": 1.2389653733193563e-08, "loss": 0.8626, "step": 12690 }, { "epoch": 0.9783379586802343, "grad_norm": 3.689103841781616, "learning_rate": 1.2301975632555973e-08, "loss": 0.826, "step": 12691 }, { "epoch": 0.9784150477952513, "grad_norm": 3.740290880203247, "learning_rate": 1.2214608489057866e-08, "loss": 0.8738, "step": 12692 }, { "epoch": 0.9784921369102683, "grad_norm": 3.896019697189331, "learning_rate": 1.2127552308147106e-08, "loss": 0.9251, "step": 12693 }, { "epoch": 0.9785692260252852, "grad_norm": 3.6664927005767822, "learning_rate": 1.2040807095249907e-08, "loss": 0.9496, "step": 12694 }, { "epoch": 0.9786463151403022, "grad_norm": 3.8327767848968506, "learning_rate": 1.1954372855775275e-08, "loss": 0.991, "step": 12695 }, { "epoch": 0.9787234042553191, "grad_norm": 3.853940486907959, "learning_rate": 1.1868249595111681e-08, "loss": 0.8489, "step": 12696 }, { "epoch": 0.9788004933703361, "grad_norm": 3.322674512863159, "learning_rate": 1.1782437318628714e-08, "loss": 0.8567, "step": 12697 }, { "epoch": 0.9788775824853531, "grad_norm": 3.6797235012054443, "learning_rate": 1.1696936031676542e-08, "loss": 0.8655, "step": 12698 }, { "epoch": 0.97895467160037, "grad_norm": 4.247450351715088, "learning_rate": 1.161174573958479e-08, "loss": 1.0226, "step": 12699 }, { "epoch": 0.979031760715387, "grad_norm": 3.664731979370117, "learning_rate": 1.152686644766532e-08, "loss": 0.8835, "step": 12700 }, { "epoch": 0.9791088498304039, "grad_norm": 3.965914249420166, "learning_rate": 1.1442298161209453e-08, "loss": 0.8815, "step": 12701 }, { "epoch": 0.9791859389454209, "grad_norm": 4.060098648071289, "learning_rate": 1.1358040885490196e-08, "loss": 0.8988, "step": 12702 }, { "epoch": 0.9792630280604379, "grad_norm": 3.575415849685669, "learning_rate": 1.1274094625760013e-08, "loss": 0.8422, "step": 12703 }, { "epoch": 0.9793401171754548, "grad_norm": 3.5373342037200928, "learning_rate": 1.119045938725305e-08, "loss": 1.0148, "step": 12704 }, { "epoch": 0.9794172062904718, "grad_norm": 3.689316511154175, "learning_rate": 1.110713517518347e-08, "loss": 0.9632, "step": 12705 }, { "epoch": 0.9794942954054887, "grad_norm": 3.855285882949829, "learning_rate": 1.1024121994745452e-08, "loss": 0.929, "step": 12706 }, { "epoch": 0.9795713845205057, "grad_norm": 3.655165910720825, "learning_rate": 1.0941419851114853e-08, "loss": 0.9428, "step": 12707 }, { "epoch": 0.9796484736355227, "grad_norm": 3.5580670833587646, "learning_rate": 1.0859028749447552e-08, "loss": 0.8268, "step": 12708 }, { "epoch": 0.9797255627505396, "grad_norm": 3.758411407470703, "learning_rate": 1.0776948694881107e-08, "loss": 0.9141, "step": 12709 }, { "epoch": 0.9798026518655566, "grad_norm": 3.422999858856201, "learning_rate": 1.0695179692531421e-08, "loss": 0.8323, "step": 12710 }, { "epoch": 0.9798797409805735, "grad_norm": 3.754002809524536, "learning_rate": 1.0613721747497196e-08, "loss": 0.942, "step": 12711 }, { "epoch": 0.9799568300955905, "grad_norm": 3.7836527824401855, "learning_rate": 1.0532574864856592e-08, "loss": 0.9146, "step": 12712 }, { "epoch": 0.9800339192106075, "grad_norm": 3.721501350402832, "learning_rate": 1.0451739049668896e-08, "loss": 0.9326, "step": 12713 }, { "epoch": 0.9801110083256244, "grad_norm": 3.7368814945220947, "learning_rate": 1.0371214306973964e-08, "loss": 0.9373, "step": 12714 }, { "epoch": 0.9801880974406414, "grad_norm": 3.6165754795074463, "learning_rate": 1.029100064179167e-08, "loss": 0.9012, "step": 12715 }, { "epoch": 0.9802651865556583, "grad_norm": 3.814692497253418, "learning_rate": 1.0211098059123015e-08, "loss": 0.8587, "step": 12716 }, { "epoch": 0.9803422756706753, "grad_norm": 3.516226053237915, "learning_rate": 1.0131506563950121e-08, "loss": 0.9172, "step": 12717 }, { "epoch": 0.9804193647856922, "grad_norm": 3.7366223335266113, "learning_rate": 1.0052226161234024e-08, "loss": 0.8711, "step": 12718 }, { "epoch": 0.9804964539007093, "grad_norm": 3.5088720321655273, "learning_rate": 9.97325685591799e-09, "loss": 0.8817, "step": 12719 }, { "epoch": 0.9805735430157262, "grad_norm": 3.6304702758789062, "learning_rate": 9.894598652925857e-09, "loss": 0.9045, "step": 12720 }, { "epoch": 0.9806506321307431, "grad_norm": 3.697366952896118, "learning_rate": 9.816251557160927e-09, "loss": 0.9645, "step": 12721 }, { "epoch": 0.9807277212457601, "grad_norm": 3.8746864795684814, "learning_rate": 9.738215573507625e-09, "loss": 0.9756, "step": 12722 }, { "epoch": 0.980804810360777, "grad_norm": 3.758707046508789, "learning_rate": 9.660490706831505e-09, "loss": 0.8504, "step": 12723 }, { "epoch": 0.9808818994757941, "grad_norm": 3.641058921813965, "learning_rate": 9.583076961978132e-09, "loss": 0.7918, "step": 12724 }, { "epoch": 0.980958988590811, "grad_norm": 3.7192978858947754, "learning_rate": 9.505974343774205e-09, "loss": 0.9248, "step": 12725 }, { "epoch": 0.9810360777058279, "grad_norm": 3.7372353076934814, "learning_rate": 9.429182857025876e-09, "loss": 0.8648, "step": 12726 }, { "epoch": 0.9811131668208449, "grad_norm": 3.754185199737549, "learning_rate": 9.352702506521539e-09, "loss": 0.8707, "step": 12727 }, { "epoch": 0.9811902559358618, "grad_norm": 3.9802560806274414, "learning_rate": 9.276533297028489e-09, "loss": 0.9176, "step": 12728 }, { "epoch": 0.9812673450508789, "grad_norm": 3.81831431388855, "learning_rate": 9.200675233296263e-09, "loss": 0.8515, "step": 12729 }, { "epoch": 0.9813444341658958, "grad_norm": 3.8928399085998535, "learning_rate": 9.125128320053856e-09, "loss": 0.9243, "step": 12730 }, { "epoch": 0.9814215232809127, "grad_norm": 3.8177058696746826, "learning_rate": 9.049892562011387e-09, "loss": 0.908, "step": 12731 }, { "epoch": 0.9814986123959297, "grad_norm": 3.94755220413208, "learning_rate": 8.974967963858994e-09, "loss": 0.906, "step": 12732 }, { "epoch": 0.9815757015109466, "grad_norm": 3.647578477859497, "learning_rate": 8.900354530268497e-09, "loss": 0.8664, "step": 12733 }, { "epoch": 0.9816527906259637, "grad_norm": 3.8050804138183594, "learning_rate": 8.826052265891172e-09, "loss": 0.9537, "step": 12734 }, { "epoch": 0.9817298797409806, "grad_norm": 3.629077672958374, "learning_rate": 8.752061175359983e-09, "loss": 0.8645, "step": 12735 }, { "epoch": 0.9818069688559975, "grad_norm": 3.9921088218688965, "learning_rate": 8.67838126328735e-09, "loss": 0.98, "step": 12736 }, { "epoch": 0.9818840579710145, "grad_norm": 3.7594244480133057, "learning_rate": 8.605012534266265e-09, "loss": 0.8998, "step": 12737 }, { "epoch": 0.9819611470860314, "grad_norm": 3.63808536529541, "learning_rate": 8.53195499287196e-09, "loss": 0.9154, "step": 12738 }, { "epoch": 0.9820382362010485, "grad_norm": 3.701803207397461, "learning_rate": 8.459208643659122e-09, "loss": 0.8308, "step": 12739 }, { "epoch": 0.9821153253160654, "grad_norm": 3.5713162422180176, "learning_rate": 8.386773491162458e-09, "loss": 0.9392, "step": 12740 }, { "epoch": 0.9821924144310823, "grad_norm": 3.8430564403533936, "learning_rate": 8.314649539898357e-09, "loss": 0.8847, "step": 12741 }, { "epoch": 0.9822695035460993, "grad_norm": 3.8548264503479004, "learning_rate": 8.242836794362662e-09, "loss": 0.9908, "step": 12742 }, { "epoch": 0.9823465926611162, "grad_norm": 4.117403984069824, "learning_rate": 8.17133525903402e-09, "loss": 0.8986, "step": 12743 }, { "epoch": 0.9824236817761333, "grad_norm": 3.5124740600585938, "learning_rate": 8.100144938368304e-09, "loss": 0.9653, "step": 12744 }, { "epoch": 0.9825007708911502, "grad_norm": 3.7108638286590576, "learning_rate": 8.029265836805855e-09, "loss": 0.8482, "step": 12745 }, { "epoch": 0.9825778600061671, "grad_norm": 3.5516583919525146, "learning_rate": 7.958697958763695e-09, "loss": 0.9324, "step": 12746 }, { "epoch": 0.9826549491211841, "grad_norm": 4.330380439758301, "learning_rate": 7.888441308642746e-09, "loss": 0.9597, "step": 12747 }, { "epoch": 0.982732038236201, "grad_norm": 3.6358656883239746, "learning_rate": 7.818495890822842e-09, "loss": 0.8769, "step": 12748 }, { "epoch": 0.9828091273512181, "grad_norm": 3.5097153186798096, "learning_rate": 7.748861709664379e-09, "loss": 0.8299, "step": 12749 }, { "epoch": 0.982886216466235, "grad_norm": 4.351414203643799, "learning_rate": 7.679538769508887e-09, "loss": 0.9594, "step": 12750 }, { "epoch": 0.9829633055812519, "grad_norm": 4.271960735321045, "learning_rate": 7.61052707467791e-09, "loss": 0.8921, "step": 12751 }, { "epoch": 0.9830403946962689, "grad_norm": 3.7134621143341064, "learning_rate": 7.541826629474669e-09, "loss": 0.8811, "step": 12752 }, { "epoch": 0.9831174838112858, "grad_norm": 3.69516921043396, "learning_rate": 7.473437438181852e-09, "loss": 0.9587, "step": 12753 }, { "epoch": 0.9831945729263029, "grad_norm": 3.7787835597991943, "learning_rate": 7.40535950506327e-09, "loss": 0.9491, "step": 12754 }, { "epoch": 0.9832716620413198, "grad_norm": 4.08289909362793, "learning_rate": 7.3375928343633054e-09, "loss": 0.9131, "step": 12755 }, { "epoch": 0.9833487511563367, "grad_norm": 3.624502658843994, "learning_rate": 7.2701374303063565e-09, "loss": 0.9144, "step": 12756 }, { "epoch": 0.9834258402713537, "grad_norm": 3.4605467319488525, "learning_rate": 7.202993297099059e-09, "loss": 0.8354, "step": 12757 }, { "epoch": 0.9835029293863706, "grad_norm": 3.8539695739746094, "learning_rate": 7.136160438925843e-09, "loss": 0.8254, "step": 12758 }, { "epoch": 0.9835800185013877, "grad_norm": 3.464372396469116, "learning_rate": 7.069638859955041e-09, "loss": 0.8553, "step": 12759 }, { "epoch": 0.9836571076164046, "grad_norm": 3.6082799434661865, "learning_rate": 7.003428564332782e-09, "loss": 0.9381, "step": 12760 }, { "epoch": 0.9837341967314215, "grad_norm": 3.6667985916137695, "learning_rate": 6.937529556187428e-09, "loss": 0.962, "step": 12761 }, { "epoch": 0.9838112858464385, "grad_norm": 3.6042168140411377, "learning_rate": 6.871941839627916e-09, "loss": 0.8476, "step": 12762 }, { "epoch": 0.9838883749614554, "grad_norm": 3.740255832672119, "learning_rate": 6.8066654187420865e-09, "loss": 0.9196, "step": 12763 }, { "epoch": 0.9839654640764725, "grad_norm": 3.6301140785217285, "learning_rate": 6.741700297600573e-09, "loss": 0.7799, "step": 12764 }, { "epoch": 0.9840425531914894, "grad_norm": 3.6463210582733154, "learning_rate": 6.677046480252913e-09, "loss": 0.8718, "step": 12765 }, { "epoch": 0.9841196423065063, "grad_norm": 4.018143653869629, "learning_rate": 6.6127039707308826e-09, "loss": 1.0302, "step": 12766 }, { "epoch": 0.9841967314215233, "grad_norm": 3.6346871852874756, "learning_rate": 6.548672773045161e-09, "loss": 0.9883, "step": 12767 }, { "epoch": 0.9842738205365402, "grad_norm": 3.8458900451660156, "learning_rate": 6.4849528911881125e-09, "loss": 0.8449, "step": 12768 }, { "epoch": 0.9843509096515572, "grad_norm": 3.548745632171631, "learning_rate": 6.421544329131557e-09, "loss": 0.738, "step": 12769 }, { "epoch": 0.9844279987665742, "grad_norm": 3.7064075469970703, "learning_rate": 6.358447090829556e-09, "loss": 0.8474, "step": 12770 }, { "epoch": 0.9845050878815911, "grad_norm": 3.6911072731018066, "learning_rate": 6.295661180216184e-09, "loss": 0.911, "step": 12771 }, { "epoch": 0.9845821769966081, "grad_norm": 3.657993793487549, "learning_rate": 6.2331866012044215e-09, "loss": 0.9808, "step": 12772 }, { "epoch": 0.984659266111625, "grad_norm": 3.437884569168091, "learning_rate": 6.171023357690598e-09, "loss": 0.8563, "step": 12773 }, { "epoch": 0.984736355226642, "grad_norm": 3.694701910018921, "learning_rate": 6.109171453549944e-09, "loss": 0.798, "step": 12774 }, { "epoch": 0.984813444341659, "grad_norm": 3.68932843208313, "learning_rate": 6.0476308926377125e-09, "loss": 0.8745, "step": 12775 }, { "epoch": 0.9848905334566759, "grad_norm": 3.8350744247436523, "learning_rate": 5.986401678791942e-09, "loss": 0.9399, "step": 12776 }, { "epoch": 0.9849676225716929, "grad_norm": 4.023548126220703, "learning_rate": 5.9254838158295805e-09, "loss": 0.835, "step": 12777 }, { "epoch": 0.9850447116867098, "grad_norm": 3.844338893890381, "learning_rate": 5.864877307547589e-09, "loss": 1.0251, "step": 12778 }, { "epoch": 0.9851218008017268, "grad_norm": 3.5062670707702637, "learning_rate": 5.804582157725724e-09, "loss": 0.8741, "step": 12779 }, { "epoch": 0.9851988899167438, "grad_norm": 3.6520488262176514, "learning_rate": 5.744598370122645e-09, "loss": 0.9983, "step": 12780 }, { "epoch": 0.9852759790317607, "grad_norm": 3.605093479156494, "learning_rate": 5.684925948477582e-09, "loss": 0.8435, "step": 12781 }, { "epoch": 0.9853530681467777, "grad_norm": 3.9544193744659424, "learning_rate": 5.625564896511448e-09, "loss": 0.899, "step": 12782 }, { "epoch": 0.9854301572617946, "grad_norm": 3.8571465015411377, "learning_rate": 5.566515217924617e-09, "loss": 0.8574, "step": 12783 }, { "epoch": 0.9855072463768116, "grad_norm": 3.823538303375244, "learning_rate": 5.507776916398588e-09, "loss": 0.9406, "step": 12784 }, { "epoch": 0.9855843354918286, "grad_norm": 4.045183181762695, "learning_rate": 5.4493499955959874e-09, "loss": 0.804, "step": 12785 }, { "epoch": 0.9856614246068455, "grad_norm": 3.724013090133667, "learning_rate": 5.391234459158901e-09, "loss": 0.9158, "step": 12786 }, { "epoch": 0.9857385137218625, "grad_norm": 3.712146759033203, "learning_rate": 5.333430310709986e-09, "loss": 1.0271, "step": 12787 }, { "epoch": 0.9858156028368794, "grad_norm": 4.106431484222412, "learning_rate": 5.2759375538541376e-09, "loss": 0.9228, "step": 12788 }, { "epoch": 0.9858926919518964, "grad_norm": 3.821424961090088, "learning_rate": 5.218756192175156e-09, "loss": 0.974, "step": 12789 }, { "epoch": 0.9859697810669134, "grad_norm": 3.7011265754699707, "learning_rate": 5.161886229238522e-09, "loss": 0.81, "step": 12790 }, { "epoch": 0.9860468701819303, "grad_norm": 3.595111608505249, "learning_rate": 5.105327668588622e-09, "loss": 0.9212, "step": 12791 }, { "epoch": 0.9861239592969473, "grad_norm": 3.862673759460449, "learning_rate": 5.049080513752636e-09, "loss": 0.9619, "step": 12792 }, { "epoch": 0.9862010484119642, "grad_norm": 3.584799289703369, "learning_rate": 4.993144768237201e-09, "loss": 0.9546, "step": 12793 }, { "epoch": 0.9862781375269812, "grad_norm": 3.9195289611816406, "learning_rate": 4.937520435528975e-09, "loss": 0.9763, "step": 12794 }, { "epoch": 0.9863552266419982, "grad_norm": 3.7915120124816895, "learning_rate": 4.8822075190962936e-09, "loss": 0.7143, "step": 12795 }, { "epoch": 0.9864323157570151, "grad_norm": 3.788917303085327, "learning_rate": 4.827206022388065e-09, "loss": 0.9824, "step": 12796 }, { "epoch": 0.9865094048720321, "grad_norm": 3.605607509613037, "learning_rate": 4.772515948832657e-09, "loss": 0.8145, "step": 12797 }, { "epoch": 0.986586493987049, "grad_norm": 3.643545150756836, "learning_rate": 4.718137301839565e-09, "loss": 0.8363, "step": 12798 }, { "epoch": 0.986663583102066, "grad_norm": 4.066047668457031, "learning_rate": 4.66407008479941e-09, "loss": 0.9509, "step": 12799 }, { "epoch": 0.986740672217083, "grad_norm": 3.583989381790161, "learning_rate": 4.6103143010833854e-09, "loss": 0.8493, "step": 12800 }, { "epoch": 0.9868177613320999, "grad_norm": 3.6887717247009277, "learning_rate": 4.556869954042143e-09, "loss": 0.8542, "step": 12801 }, { "epoch": 0.9868948504471169, "grad_norm": 3.8332138061523438, "learning_rate": 4.5037370470085715e-09, "loss": 0.9282, "step": 12802 }, { "epoch": 0.9869719395621338, "grad_norm": 3.6815474033355713, "learning_rate": 4.450915583293913e-09, "loss": 0.8847, "step": 12803 }, { "epoch": 0.9870490286771508, "grad_norm": 3.505082607269287, "learning_rate": 4.398405566192754e-09, "loss": 0.7977, "step": 12804 }, { "epoch": 0.9871261177921677, "grad_norm": 3.825004816055298, "learning_rate": 4.346206998977476e-09, "loss": 1.0071, "step": 12805 }, { "epoch": 0.9872032069071847, "grad_norm": 4.440659999847412, "learning_rate": 4.294319884903253e-09, "loss": 1.0099, "step": 12806 }, { "epoch": 0.9872802960222017, "grad_norm": 3.277602195739746, "learning_rate": 4.24274422720472e-09, "loss": 0.8623, "step": 12807 }, { "epoch": 0.9873573851372186, "grad_norm": 3.6194324493408203, "learning_rate": 4.191480029097639e-09, "loss": 0.9843, "step": 12808 }, { "epoch": 0.9874344742522356, "grad_norm": 3.5237951278686523, "learning_rate": 4.140527293777785e-09, "loss": 0.7906, "step": 12809 }, { "epoch": 0.9875115633672525, "grad_norm": 4.282720565795898, "learning_rate": 4.089886024421508e-09, "loss": 0.9852, "step": 12810 }, { "epoch": 0.9875886524822695, "grad_norm": 3.8325626850128174, "learning_rate": 4.039556224186836e-09, "loss": 0.9432, "step": 12811 }, { "epoch": 0.9876657415972865, "grad_norm": 3.7518670558929443, "learning_rate": 3.989537896210704e-09, "loss": 0.8936, "step": 12812 }, { "epoch": 0.9877428307123034, "grad_norm": 3.3281097412109375, "learning_rate": 3.93983104361173e-09, "loss": 0.8703, "step": 12813 }, { "epoch": 0.9878199198273204, "grad_norm": 3.881019353866577, "learning_rate": 3.890435669489656e-09, "loss": 0.8354, "step": 12814 }, { "epoch": 0.9878970089423373, "grad_norm": 3.5037472248077393, "learning_rate": 3.84135177692313e-09, "loss": 0.9291, "step": 12815 }, { "epoch": 0.9879740980573543, "grad_norm": 3.938447952270508, "learning_rate": 3.792579368972482e-09, "loss": 0.9214, "step": 12816 }, { "epoch": 0.9880511871723713, "grad_norm": 4.022164344787598, "learning_rate": 3.744118448678058e-09, "loss": 0.8379, "step": 12817 }, { "epoch": 0.9881282762873882, "grad_norm": 3.696821451187134, "learning_rate": 3.6959690190618847e-09, "loss": 0.8365, "step": 12818 }, { "epoch": 0.9882053654024052, "grad_norm": 3.4798009395599365, "learning_rate": 3.6481310831260054e-09, "loss": 0.8654, "step": 12819 }, { "epoch": 0.9882824545174221, "grad_norm": 3.6241512298583984, "learning_rate": 3.600604643851924e-09, "loss": 0.8805, "step": 12820 }, { "epoch": 0.988359543632439, "grad_norm": 3.9195075035095215, "learning_rate": 3.5533897042033805e-09, "loss": 0.9811, "step": 12821 }, { "epoch": 0.9884366327474561, "grad_norm": 4.072207927703857, "learning_rate": 3.5064862671230217e-09, "loss": 0.9203, "step": 12822 }, { "epoch": 0.988513721862473, "grad_norm": 4.044565200805664, "learning_rate": 3.4598943355362845e-09, "loss": 0.9196, "step": 12823 }, { "epoch": 0.98859081097749, "grad_norm": 3.762471914291382, "learning_rate": 3.4136139123475134e-09, "loss": 0.8305, "step": 12824 }, { "epoch": 0.9886679000925069, "grad_norm": 3.648581027984619, "learning_rate": 3.3676450004416215e-09, "loss": 0.8875, "step": 12825 }, { "epoch": 0.9887449892075239, "grad_norm": 3.582662343978882, "learning_rate": 3.321987602685206e-09, "loss": 0.8568, "step": 12826 }, { "epoch": 0.9888220783225409, "grad_norm": 3.838958978652954, "learning_rate": 3.276641721924323e-09, "loss": 0.9401, "step": 12827 }, { "epoch": 0.9888991674375578, "grad_norm": 3.704432964324951, "learning_rate": 3.2316073609856e-09, "loss": 0.8952, "step": 12828 }, { "epoch": 0.9889762565525748, "grad_norm": 3.5584044456481934, "learning_rate": 3.186884522677902e-09, "loss": 1.0251, "step": 12829 }, { "epoch": 0.9890533456675917, "grad_norm": 3.896476984024048, "learning_rate": 3.1424732097884434e-09, "loss": 0.888, "step": 12830 }, { "epoch": 0.9891304347826086, "grad_norm": 3.5131354331970215, "learning_rate": 3.098373425086676e-09, "loss": 0.8817, "step": 12831 }, { "epoch": 0.9892075238976257, "grad_norm": 3.6488070487976074, "learning_rate": 3.0545851713215113e-09, "loss": 0.8898, "step": 12832 }, { "epoch": 0.9892846130126426, "grad_norm": 3.468665361404419, "learning_rate": 3.011108451222988e-09, "loss": 0.8596, "step": 12833 }, { "epoch": 0.9893617021276596, "grad_norm": 3.568416118621826, "learning_rate": 2.9679432675017163e-09, "loss": 0.9725, "step": 12834 }, { "epoch": 0.9894387912426765, "grad_norm": 4.020820617675781, "learning_rate": 2.9250896228494307e-09, "loss": 0.8734, "step": 12835 }, { "epoch": 0.9895158803576934, "grad_norm": 3.5080959796905518, "learning_rate": 2.8825475199367735e-09, "loss": 0.8361, "step": 12836 }, { "epoch": 0.9895929694727105, "grad_norm": 3.717933416366577, "learning_rate": 2.8403169614166226e-09, "loss": 0.7573, "step": 12837 }, { "epoch": 0.9896700585877274, "grad_norm": 3.621506690979004, "learning_rate": 2.7983979499218715e-09, "loss": 0.903, "step": 12838 }, { "epoch": 0.9897471477027444, "grad_norm": 3.9174814224243164, "learning_rate": 2.756790488065986e-09, "loss": 0.8788, "step": 12839 }, { "epoch": 0.9898242368177613, "grad_norm": 4.379630088806152, "learning_rate": 2.7154945784424456e-09, "loss": 0.912, "step": 12840 }, { "epoch": 0.9899013259327782, "grad_norm": 3.8400206565856934, "learning_rate": 2.6745102236264142e-09, "loss": 0.9096, "step": 12841 }, { "epoch": 0.9899784150477953, "grad_norm": 3.97227144241333, "learning_rate": 2.633837426173069e-09, "loss": 0.8001, "step": 12842 }, { "epoch": 0.9900555041628122, "grad_norm": 3.7930757999420166, "learning_rate": 2.5934761886176052e-09, "loss": 0.886, "step": 12843 }, { "epoch": 0.9901325932778292, "grad_norm": 3.9105029106140137, "learning_rate": 2.5534265134768977e-09, "loss": 0.9109, "step": 12844 }, { "epoch": 0.9902096823928461, "grad_norm": 3.479025363922119, "learning_rate": 2.513688403247283e-09, "loss": 0.8513, "step": 12845 }, { "epoch": 0.990286771507863, "grad_norm": 4.080109119415283, "learning_rate": 2.474261860406779e-09, "loss": 0.8726, "step": 12846 }, { "epoch": 0.9903638606228801, "grad_norm": 3.6820366382598877, "learning_rate": 2.4351468874134197e-09, "loss": 0.8961, "step": 12847 }, { "epoch": 0.990440949737897, "grad_norm": 3.6713099479675293, "learning_rate": 2.396343486705255e-09, "loss": 0.9043, "step": 12848 }, { "epoch": 0.990518038852914, "grad_norm": 3.635523796081543, "learning_rate": 2.3578516607020154e-09, "loss": 0.8773, "step": 12849 }, { "epoch": 0.9905951279679309, "grad_norm": 3.570716142654419, "learning_rate": 2.319671411802893e-09, "loss": 0.9127, "step": 12850 }, { "epoch": 0.9906722170829478, "grad_norm": 3.6702277660369873, "learning_rate": 2.281802742388761e-09, "loss": 0.852, "step": 12851 }, { "epoch": 0.9907493061979649, "grad_norm": 3.9197945594787598, "learning_rate": 2.2442456548205094e-09, "loss": 0.9476, "step": 12852 }, { "epoch": 0.9908263953129818, "grad_norm": 3.8219149112701416, "learning_rate": 2.207000151439598e-09, "loss": 0.9928, "step": 12853 }, { "epoch": 0.9909034844279988, "grad_norm": 3.7937533855438232, "learning_rate": 2.170066234568058e-09, "loss": 1.061, "step": 12854 }, { "epoch": 0.9909805735430157, "grad_norm": 3.5674586296081543, "learning_rate": 2.133443906508492e-09, "loss": 0.8648, "step": 12855 }, { "epoch": 0.9910576626580326, "grad_norm": 3.8357954025268555, "learning_rate": 2.097133169543519e-09, "loss": 0.9061, "step": 12856 }, { "epoch": 0.9911347517730497, "grad_norm": 3.6642403602600098, "learning_rate": 2.0611340259379942e-09, "loss": 0.9031, "step": 12857 }, { "epoch": 0.9912118408880666, "grad_norm": 3.718217134475708, "learning_rate": 2.0254464779356776e-09, "loss": 0.8826, "step": 12858 }, { "epoch": 0.9912889300030836, "grad_norm": 3.5286970138549805, "learning_rate": 1.990070527761456e-09, "loss": 0.8263, "step": 12859 }, { "epoch": 0.9913660191181005, "grad_norm": 3.934288263320923, "learning_rate": 1.9550061776213435e-09, "loss": 0.8804, "step": 12860 }, { "epoch": 0.9914431082331174, "grad_norm": 3.582671880722046, "learning_rate": 1.9202534297008134e-09, "loss": 0.8939, "step": 12861 }, { "epoch": 0.9915201973481345, "grad_norm": 3.763944387435913, "learning_rate": 1.8858122861664664e-09, "loss": 0.9044, "step": 12862 }, { "epoch": 0.9915972864631514, "grad_norm": 4.1658196449279785, "learning_rate": 1.8516827491660282e-09, "loss": 0.8877, "step": 12863 }, { "epoch": 0.9916743755781684, "grad_norm": 3.695138692855835, "learning_rate": 1.817864820827242e-09, "loss": 0.8791, "step": 12864 }, { "epoch": 0.9917514646931853, "grad_norm": 4.022881984710693, "learning_rate": 1.784358503258421e-09, "loss": 1.0657, "step": 12865 }, { "epoch": 0.9918285538082022, "grad_norm": 3.7440459728240967, "learning_rate": 1.7511637985478947e-09, "loss": 0.8766, "step": 12866 }, { "epoch": 0.9919056429232193, "grad_norm": 3.9772725105285645, "learning_rate": 1.718280708766229e-09, "loss": 0.9796, "step": 12867 }, { "epoch": 0.9919827320382362, "grad_norm": 3.865461826324463, "learning_rate": 1.6857092359628957e-09, "loss": 0.8336, "step": 12868 }, { "epoch": 0.9920598211532532, "grad_norm": 4.357512474060059, "learning_rate": 1.6534493821684927e-09, "loss": 1.0112, "step": 12869 }, { "epoch": 0.9921369102682701, "grad_norm": 3.5063059329986572, "learning_rate": 1.621501149394744e-09, "loss": 0.8071, "step": 12870 }, { "epoch": 0.992213999383287, "grad_norm": 3.5108940601348877, "learning_rate": 1.5898645396328349e-09, "loss": 0.9605, "step": 12871 }, { "epoch": 0.992291088498304, "grad_norm": 3.727933883666992, "learning_rate": 1.5585395548556314e-09, "loss": 0.9568, "step": 12872 }, { "epoch": 0.992368177613321, "grad_norm": 3.5517189502716064, "learning_rate": 1.5275261970154608e-09, "loss": 0.9227, "step": 12873 }, { "epoch": 0.992445266728338, "grad_norm": 3.8274848461151123, "learning_rate": 1.4968244680468868e-09, "loss": 1.0268, "step": 12874 }, { "epoch": 0.9925223558433549, "grad_norm": 3.797487258911133, "learning_rate": 1.4664343698628236e-09, "loss": 0.7404, "step": 12875 }, { "epoch": 0.9925994449583718, "grad_norm": 3.6216273307800293, "learning_rate": 1.436355904358977e-09, "loss": 0.919, "step": 12876 }, { "epoch": 0.9926765340733888, "grad_norm": 3.420397996902466, "learning_rate": 1.4065890734099586e-09, "loss": 0.8691, "step": 12877 }, { "epoch": 0.9927536231884058, "grad_norm": 3.8331243991851807, "learning_rate": 1.3771338788715061e-09, "loss": 0.945, "step": 12878 }, { "epoch": 0.9928307123034228, "grad_norm": 3.9298641681671143, "learning_rate": 1.3479903225804836e-09, "loss": 0.937, "step": 12879 }, { "epoch": 0.9929078014184397, "grad_norm": 3.827240467071533, "learning_rate": 1.319158406353771e-09, "loss": 0.9731, "step": 12880 }, { "epoch": 0.9929848905334566, "grad_norm": 3.64302921295166, "learning_rate": 1.2906381319882644e-09, "loss": 0.8572, "step": 12881 }, { "epoch": 0.9930619796484736, "grad_norm": 3.7599618434906006, "learning_rate": 1.2624295012625409e-09, "loss": 0.954, "step": 12882 }, { "epoch": 0.9931390687634906, "grad_norm": 3.944673776626587, "learning_rate": 1.2345325159357491e-09, "loss": 0.9175, "step": 12883 }, { "epoch": 0.9932161578785076, "grad_norm": 3.6056995391845703, "learning_rate": 1.2069471777459429e-09, "loss": 0.8907, "step": 12884 }, { "epoch": 0.9932932469935245, "grad_norm": 3.5463924407958984, "learning_rate": 1.1796734884139682e-09, "loss": 0.8499, "step": 12885 }, { "epoch": 0.9933703361085414, "grad_norm": 3.643226385116577, "learning_rate": 1.1527114496395764e-09, "loss": 0.9093, "step": 12886 }, { "epoch": 0.9934474252235584, "grad_norm": 3.62485671043396, "learning_rate": 1.126061063103645e-09, "loss": 0.9344, "step": 12887 }, { "epoch": 0.9935245143385754, "grad_norm": 3.879370927810669, "learning_rate": 1.0997223304687333e-09, "loss": 0.9439, "step": 12888 }, { "epoch": 0.9936016034535924, "grad_norm": 3.560511350631714, "learning_rate": 1.0736952533757506e-09, "loss": 0.8394, "step": 12889 }, { "epoch": 0.9936786925686093, "grad_norm": 3.8150246143341064, "learning_rate": 1.047979833447843e-09, "loss": 0.7913, "step": 12890 }, { "epoch": 0.9937557816836263, "grad_norm": 4.272899150848389, "learning_rate": 1.0225760722876177e-09, "loss": 0.9468, "step": 12891 }, { "epoch": 0.9938328707986432, "grad_norm": 3.9249536991119385, "learning_rate": 9.974839714799178e-10, "loss": 0.8953, "step": 12892 }, { "epoch": 0.9939099599136602, "grad_norm": 3.497441291809082, "learning_rate": 9.727035325884925e-10, "loss": 0.822, "step": 12893 }, { "epoch": 0.9939870490286772, "grad_norm": 4.1843647956848145, "learning_rate": 9.482347571587724e-10, "loss": 0.9528, "step": 12894 }, { "epoch": 0.9940641381436941, "grad_norm": 3.7656443119049072, "learning_rate": 9.240776467150936e-10, "loss": 0.9781, "step": 12895 }, { "epoch": 0.9941412272587111, "grad_norm": 3.6662378311157227, "learning_rate": 9.002322027651389e-10, "loss": 0.889, "step": 12896 }, { "epoch": 0.994218316373728, "grad_norm": 4.327576637268066, "learning_rate": 8.766984267938316e-10, "loss": 0.8852, "step": 12897 }, { "epoch": 0.994295405488745, "grad_norm": 3.467092275619507, "learning_rate": 8.534763202699969e-10, "loss": 0.8667, "step": 12898 }, { "epoch": 0.994372494603762, "grad_norm": 3.563298463821411, "learning_rate": 8.305658846402554e-10, "loss": 0.9111, "step": 12899 }, { "epoch": 0.9944495837187789, "grad_norm": 3.706197738647461, "learning_rate": 8.079671213334639e-10, "loss": 0.854, "step": 12900 }, { "epoch": 0.9945266728337959, "grad_norm": 4.062145233154297, "learning_rate": 7.856800317584956e-10, "loss": 0.8797, "step": 12901 }, { "epoch": 0.9946037619488128, "grad_norm": 3.602550506591797, "learning_rate": 7.637046173047946e-10, "loss": 0.8933, "step": 12902 }, { "epoch": 0.9946808510638298, "grad_norm": 4.166884422302246, "learning_rate": 7.420408793423762e-10, "loss": 0.9677, "step": 12903 }, { "epoch": 0.9947579401788468, "grad_norm": 4.013248443603516, "learning_rate": 7.206888192218265e-10, "loss": 0.9302, "step": 12904 }, { "epoch": 0.9948350292938637, "grad_norm": 3.9088120460510254, "learning_rate": 6.99648438274303e-10, "loss": 0.8768, "step": 12905 }, { "epoch": 0.9949121184088807, "grad_norm": 3.6118009090423584, "learning_rate": 6.789197378115342e-10, "loss": 0.9307, "step": 12906 }, { "epoch": 0.9949892075238976, "grad_norm": 3.630589485168457, "learning_rate": 6.585027191263748e-10, "loss": 0.9369, "step": 12907 }, { "epoch": 0.9950662966389145, "grad_norm": 3.9359915256500244, "learning_rate": 6.383973834911406e-10, "loss": 0.8476, "step": 12908 }, { "epoch": 0.9951433857539316, "grad_norm": 3.3206164836883545, "learning_rate": 6.186037321592731e-10, "loss": 0.7947, "step": 12909 }, { "epoch": 0.9952204748689485, "grad_norm": 3.8254833221435547, "learning_rate": 5.991217663653404e-10, "loss": 0.859, "step": 12910 }, { "epoch": 0.9952975639839655, "grad_norm": 3.701080322265625, "learning_rate": 5.799514873233714e-10, "loss": 0.8847, "step": 12911 }, { "epoch": 0.9953746530989824, "grad_norm": 3.581287145614624, "learning_rate": 5.610928962290763e-10, "loss": 0.9241, "step": 12912 }, { "epoch": 0.9954517422139993, "grad_norm": 3.830244302749634, "learning_rate": 5.425459942576261e-10, "loss": 0.9187, "step": 12913 }, { "epoch": 0.9955288313290164, "grad_norm": 3.582486867904663, "learning_rate": 5.243107825653182e-10, "loss": 0.8323, "step": 12914 }, { "epoch": 0.9956059204440333, "grad_norm": 3.7536768913269043, "learning_rate": 5.06387262289576e-10, "loss": 0.9389, "step": 12915 }, { "epoch": 0.9956830095590503, "grad_norm": 3.8179402351379395, "learning_rate": 4.887754345478391e-10, "loss": 0.8959, "step": 12916 }, { "epoch": 0.9957600986740672, "grad_norm": 3.915501356124878, "learning_rate": 4.714753004375627e-10, "loss": 0.9829, "step": 12917 }, { "epoch": 0.9958371877890841, "grad_norm": 3.4252476692199707, "learning_rate": 4.5448686103732876e-10, "loss": 0.816, "step": 12918 }, { "epoch": 0.9959142769041012, "grad_norm": 4.076374053955078, "learning_rate": 4.37810117406845e-10, "loss": 0.9458, "step": 12919 }, { "epoch": 0.9959913660191181, "grad_norm": 3.622620105743408, "learning_rate": 4.2144507058528016e-10, "loss": 0.8332, "step": 12920 }, { "epoch": 0.9960684551341351, "grad_norm": 3.3440213203430176, "learning_rate": 4.053917215934844e-10, "loss": 0.8837, "step": 12921 }, { "epoch": 0.996145544249152, "grad_norm": 3.92189884185791, "learning_rate": 3.8965007143176857e-10, "loss": 0.9619, "step": 12922 }, { "epoch": 0.9962226333641689, "grad_norm": 4.633378505706787, "learning_rate": 3.742201210815699e-10, "loss": 0.9774, "step": 12923 }, { "epoch": 0.996299722479186, "grad_norm": 3.8096113204956055, "learning_rate": 3.5910187150545174e-10, "loss": 0.9167, "step": 12924 }, { "epoch": 0.9963768115942029, "grad_norm": 3.8679616451263428, "learning_rate": 3.4429532364488315e-10, "loss": 0.844, "step": 12925 }, { "epoch": 0.9964539007092199, "grad_norm": 4.342710494995117, "learning_rate": 3.298004784241249e-10, "loss": 0.9341, "step": 12926 }, { "epoch": 0.9965309898242368, "grad_norm": 4.704819679260254, "learning_rate": 3.156173367457882e-10, "loss": 0.9207, "step": 12927 }, { "epoch": 0.9966080789392537, "grad_norm": 3.6841025352478027, "learning_rate": 3.01745899495276e-10, "loss": 0.8167, "step": 12928 }, { "epoch": 0.9966851680542708, "grad_norm": 3.9212276935577393, "learning_rate": 2.8818616753634177e-10, "loss": 0.9434, "step": 12929 }, { "epoch": 0.9967622571692877, "grad_norm": 3.5783891677856445, "learning_rate": 2.749381417155306e-10, "loss": 0.8192, "step": 12930 }, { "epoch": 0.9968393462843047, "grad_norm": 3.933195114135742, "learning_rate": 2.6200182285718303e-10, "loss": 0.8573, "step": 12931 }, { "epoch": 0.9969164353993216, "grad_norm": 4.147609710693359, "learning_rate": 2.493772117695414e-10, "loss": 0.9402, "step": 12932 }, { "epoch": 0.9969935245143385, "grad_norm": 3.782233476638794, "learning_rate": 2.3706430923808867e-10, "loss": 0.969, "step": 12933 }, { "epoch": 0.9970706136293556, "grad_norm": 3.95326566696167, "learning_rate": 2.250631160316541e-10, "loss": 1.0036, "step": 12934 }, { "epoch": 0.9971477027443725, "grad_norm": 3.8084826469421387, "learning_rate": 2.1337363289797296e-10, "loss": 0.9255, "step": 12935 }, { "epoch": 0.9972247918593895, "grad_norm": 4.2080535888671875, "learning_rate": 2.0199586056590669e-10, "loss": 0.9461, "step": 12936 }, { "epoch": 0.9973018809744064, "grad_norm": 4.278135299682617, "learning_rate": 1.909297997448878e-10, "loss": 0.9677, "step": 12937 }, { "epoch": 0.9973789700894233, "grad_norm": 3.921271800994873, "learning_rate": 1.8017545112491984e-10, "loss": 0.9193, "step": 12938 }, { "epoch": 0.9974560592044404, "grad_norm": 3.6912286281585693, "learning_rate": 1.697328153760225e-10, "loss": 0.9062, "step": 12939 }, { "epoch": 0.9975331483194573, "grad_norm": 3.613358736038208, "learning_rate": 1.5960189314934149e-10, "loss": 0.8264, "step": 12940 }, { "epoch": 0.9976102374344743, "grad_norm": 4.033813953399658, "learning_rate": 1.4978268507659376e-10, "loss": 0.9703, "step": 12941 }, { "epoch": 0.9976873265494912, "grad_norm": 3.622581958770752, "learning_rate": 1.402751917700673e-10, "loss": 0.8894, "step": 12942 }, { "epoch": 0.9977644156645081, "grad_norm": 3.6066484451293945, "learning_rate": 1.3107941382262124e-10, "loss": 0.8503, "step": 12943 }, { "epoch": 0.9978415047795252, "grad_norm": 3.685361385345459, "learning_rate": 1.221953518071306e-10, "loss": 0.8426, "step": 12944 }, { "epoch": 0.9979185938945421, "grad_norm": 4.148501873016357, "learning_rate": 1.1362300627815182e-10, "loss": 0.9235, "step": 12945 }, { "epoch": 0.9979956830095591, "grad_norm": 3.7229340076446533, "learning_rate": 1.0536237776970215e-10, "loss": 0.8819, "step": 12946 }, { "epoch": 0.998072772124576, "grad_norm": 3.745091199874878, "learning_rate": 9.741346679636998e-11, "loss": 0.8432, "step": 12947 }, { "epoch": 0.9981498612395929, "grad_norm": 3.642552137374878, "learning_rate": 8.9776273854425e-11, "loss": 0.7828, "step": 12948 }, { "epoch": 0.99822695035461, "grad_norm": 4.068527698516846, "learning_rate": 8.245079941959777e-11, "loss": 0.8816, "step": 12949 }, { "epoch": 0.9983040394696269, "grad_norm": 3.732131242752075, "learning_rate": 7.543704394874507e-11, "loss": 0.8963, "step": 12950 }, { "epoch": 0.9983811285846439, "grad_norm": 4.001811504364014, "learning_rate": 6.873500787873965e-11, "loss": 0.9997, "step": 12951 }, { "epoch": 0.9984582176996608, "grad_norm": 3.770373821258545, "learning_rate": 6.234469162813561e-11, "loss": 0.8653, "step": 12952 }, { "epoch": 0.9985353068146777, "grad_norm": 3.548311710357666, "learning_rate": 5.6266095595503e-11, "loss": 0.8459, "step": 12953 }, { "epoch": 0.9986123959296948, "grad_norm": 3.578949451446533, "learning_rate": 5.049922015887276e-11, "loss": 0.8083, "step": 12954 }, { "epoch": 0.9986894850447117, "grad_norm": 3.936638593673706, "learning_rate": 4.504406567795716e-11, "loss": 0.9448, "step": 12955 }, { "epoch": 0.9987665741597287, "grad_norm": 3.4746735095977783, "learning_rate": 3.990063249359466e-11, "loss": 0.8742, "step": 12956 }, { "epoch": 0.9988436632747456, "grad_norm": 3.4810571670532227, "learning_rate": 3.506892092552949e-11, "loss": 0.8603, "step": 12957 }, { "epoch": 0.9989207523897625, "grad_norm": 3.837407112121582, "learning_rate": 3.054893127574232e-11, "loss": 1.0381, "step": 12958 }, { "epoch": 0.9989978415047795, "grad_norm": 3.7372279167175293, "learning_rate": 2.6340663825674683e-11, "loss": 0.8691, "step": 12959 }, { "epoch": 0.9990749306197965, "grad_norm": 3.8073391914367676, "learning_rate": 2.2444118837339212e-11, "loss": 0.945, "step": 12960 }, { "epoch": 0.9991520197348135, "grad_norm": 3.483799457550049, "learning_rate": 1.8859296554429862e-11, "loss": 0.8425, "step": 12961 }, { "epoch": 0.9992291088498304, "grad_norm": 4.023273468017578, "learning_rate": 1.5586197200101462e-11, "loss": 0.905, "step": 12962 }, { "epoch": 0.9993061979648473, "grad_norm": 3.542368173599243, "learning_rate": 1.2624820978079932e-11, "loss": 0.922, "step": 12963 }, { "epoch": 0.9993832870798643, "grad_norm": 3.746952772140503, "learning_rate": 9.975168073772523e-12, "loss": 0.7879, "step": 12964 }, { "epoch": 0.9994603761948813, "grad_norm": 3.6323859691619873, "learning_rate": 7.63723865149224e-12, "loss": 0.9078, "step": 12965 }, { "epoch": 0.9995374653098983, "grad_norm": 3.747823476791382, "learning_rate": 5.611032857788523e-12, "loss": 0.8839, "step": 12966 }, { "epoch": 0.9996145544249152, "grad_norm": 3.6762750148773193, "learning_rate": 3.896550818116574e-12, "loss": 0.86, "step": 12967 }, { "epoch": 0.9996916435399321, "grad_norm": 3.4373939037323, "learning_rate": 2.4937926401680245e-12, "loss": 0.904, "step": 12968 }, { "epoch": 0.9997687326549491, "grad_norm": 3.7813000679016113, "learning_rate": 1.402758411095384e-12, "loss": 0.9159, "step": 12969 }, { "epoch": 0.9998458217699661, "grad_norm": 3.9511356353759766, "learning_rate": 6.234481991773678e-13, "loss": 0.864, "step": 12970 }, { "epoch": 0.9999229108849831, "grad_norm": 4.2751851081848145, "learning_rate": 1.5586205215356586e-13, "loss": 0.8958, "step": 12971 }, { "epoch": 1.0, "grad_norm": 4.090433120727539, "learning_rate": 0.0, "loss": 0.8966, "step": 12972 }, { "epoch": 1.0, "step": 12972, "total_flos": 2.0878280805119427e+18, "train_loss": 0.3483946987583135, "train_runtime": 106063.3145, "train_samples_per_second": 3.914, "train_steps_per_second": 0.122 } ], "logging_steps": 1.0, "max_steps": 12972, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.0878280805119427e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }