| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 6.0, | |
| "eval_steps": 500, | |
| "global_step": 2442, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.012285012285012284, | |
| "grad_norm": 20.393420102976396, | |
| "learning_rate": 6.530612244897961e-07, | |
| "loss": 1.0262, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 1.0177384614944458, | |
| "step": 5, | |
| "valid_targets_mean": 1250.2, | |
| "valid_targets_min": 648 | |
| }, | |
| { | |
| "epoch": 0.02457002457002457, | |
| "grad_norm": 16.618964498117016, | |
| "learning_rate": 1.469387755102041e-06, | |
| "loss": 0.9879, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.9600104689598083, | |
| "step": 10, | |
| "valid_targets_mean": 1490.1, | |
| "valid_targets_min": 971 | |
| }, | |
| { | |
| "epoch": 0.036855036855036855, | |
| "grad_norm": 16.76493997642498, | |
| "learning_rate": 2.285714285714286e-06, | |
| "loss": 0.9733, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.9610337018966675, | |
| "step": 15, | |
| "valid_targets_mean": 1209.8, | |
| "valid_targets_min": 739 | |
| }, | |
| { | |
| "epoch": 0.04914004914004914, | |
| "grad_norm": 10.325809762015542, | |
| "learning_rate": 3.1020408163265307e-06, | |
| "loss": 0.8637, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.8307616710662842, | |
| "step": 20, | |
| "valid_targets_mean": 1436.6, | |
| "valid_targets_min": 853 | |
| }, | |
| { | |
| "epoch": 0.06142506142506143, | |
| "grad_norm": 6.763290592061581, | |
| "learning_rate": 3.9183673469387755e-06, | |
| "loss": 0.7754, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.7274804711341858, | |
| "step": 25, | |
| "valid_targets_mean": 1216.4, | |
| "valid_targets_min": 699 | |
| }, | |
| { | |
| "epoch": 0.07371007371007371, | |
| "grad_norm": 4.61391453463459, | |
| "learning_rate": 4.734693877551021e-06, | |
| "loss": 0.6768, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.6561411619186401, | |
| "step": 30, | |
| "valid_targets_mean": 1385.0, | |
| "valid_targets_min": 568 | |
| }, | |
| { | |
| "epoch": 0.085995085995086, | |
| "grad_norm": 2.746094169189624, | |
| "learning_rate": 5.551020408163266e-06, | |
| "loss": 0.6116, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5776642560958862, | |
| "step": 35, | |
| "valid_targets_mean": 1544.8, | |
| "valid_targets_min": 654 | |
| }, | |
| { | |
| "epoch": 0.09828009828009827, | |
| "grad_norm": 1.9498600894615645, | |
| "learning_rate": 6.36734693877551e-06, | |
| "loss": 0.542, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5209287405014038, | |
| "step": 40, | |
| "valid_targets_mean": 1367.7, | |
| "valid_targets_min": 574 | |
| }, | |
| { | |
| "epoch": 0.11056511056511056, | |
| "grad_norm": 1.6426899897426925, | |
| "learning_rate": 7.183673469387755e-06, | |
| "loss": 0.4923, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4782862663269043, | |
| "step": 45, | |
| "valid_targets_mean": 1329.5, | |
| "valid_targets_min": 531 | |
| }, | |
| { | |
| "epoch": 0.12285012285012285, | |
| "grad_norm": 1.5930382187301593, | |
| "learning_rate": 8.000000000000001e-06, | |
| "loss": 0.4667, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.45163053274154663, | |
| "step": 50, | |
| "valid_targets_mean": 1316.3, | |
| "valid_targets_min": 783 | |
| }, | |
| { | |
| "epoch": 0.13513513513513514, | |
| "grad_norm": 1.3625584659635621, | |
| "learning_rate": 8.816326530612247e-06, | |
| "loss": 0.4221, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4141678214073181, | |
| "step": 55, | |
| "valid_targets_mean": 1350.4, | |
| "valid_targets_min": 794 | |
| }, | |
| { | |
| "epoch": 0.14742014742014742, | |
| "grad_norm": 1.5476732458974913, | |
| "learning_rate": 9.63265306122449e-06, | |
| "loss": 0.406, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3802851140499115, | |
| "step": 60, | |
| "valid_targets_mean": 1348.1, | |
| "valid_targets_min": 693 | |
| }, | |
| { | |
| "epoch": 0.1597051597051597, | |
| "grad_norm": 1.2981161511327286, | |
| "learning_rate": 1.0448979591836737e-05, | |
| "loss": 0.3753, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3662075996398926, | |
| "step": 65, | |
| "valid_targets_mean": 1233.3, | |
| "valid_targets_min": 624 | |
| }, | |
| { | |
| "epoch": 0.171990171990172, | |
| "grad_norm": 1.1801878613153098, | |
| "learning_rate": 1.126530612244898e-05, | |
| "loss": 0.3723, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37776321172714233, | |
| "step": 70, | |
| "valid_targets_mean": 1599.6, | |
| "valid_targets_min": 768 | |
| }, | |
| { | |
| "epoch": 0.18427518427518427, | |
| "grad_norm": 1.3597452765229934, | |
| "learning_rate": 1.2081632653061225e-05, | |
| "loss": 0.3497, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3400910198688507, | |
| "step": 75, | |
| "valid_targets_mean": 1175.1, | |
| "valid_targets_min": 661 | |
| }, | |
| { | |
| "epoch": 0.19656019656019655, | |
| "grad_norm": 1.1333692250183753, | |
| "learning_rate": 1.2897959183673469e-05, | |
| "loss": 0.3274, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33627358078956604, | |
| "step": 80, | |
| "valid_targets_mean": 1151.1, | |
| "valid_targets_min": 712 | |
| }, | |
| { | |
| "epoch": 0.20884520884520885, | |
| "grad_norm": 1.2552043931159993, | |
| "learning_rate": 1.3714285714285716e-05, | |
| "loss": 0.3201, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31260746717453003, | |
| "step": 85, | |
| "valid_targets_mean": 1273.6, | |
| "valid_targets_min": 590 | |
| }, | |
| { | |
| "epoch": 0.22113022113022113, | |
| "grad_norm": 1.0701038993714678, | |
| "learning_rate": 1.4530612244897961e-05, | |
| "loss": 0.3047, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3113088309764862, | |
| "step": 90, | |
| "valid_targets_mean": 1315.8, | |
| "valid_targets_min": 649 | |
| }, | |
| { | |
| "epoch": 0.2334152334152334, | |
| "grad_norm": 1.0767332367371567, | |
| "learning_rate": 1.5346938775510204e-05, | |
| "loss": 0.3141, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3126605749130249, | |
| "step": 95, | |
| "valid_targets_mean": 1209.7, | |
| "valid_targets_min": 631 | |
| }, | |
| { | |
| "epoch": 0.2457002457002457, | |
| "grad_norm": 1.2226767141577841, | |
| "learning_rate": 1.616326530612245e-05, | |
| "loss": 0.3136, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2967890501022339, | |
| "step": 100, | |
| "valid_targets_mean": 1322.4, | |
| "valid_targets_min": 940 | |
| }, | |
| { | |
| "epoch": 0.257985257985258, | |
| "grad_norm": 1.1217031077941872, | |
| "learning_rate": 1.6979591836734695e-05, | |
| "loss": 0.3095, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3070026934146881, | |
| "step": 105, | |
| "valid_targets_mean": 1323.4, | |
| "valid_targets_min": 711 | |
| }, | |
| { | |
| "epoch": 0.2702702702702703, | |
| "grad_norm": 1.1274976542736919, | |
| "learning_rate": 1.779591836734694e-05, | |
| "loss": 0.2961, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3174216151237488, | |
| "step": 110, | |
| "valid_targets_mean": 1408.8, | |
| "valid_targets_min": 897 | |
| }, | |
| { | |
| "epoch": 0.28255528255528256, | |
| "grad_norm": 1.0850909284018426, | |
| "learning_rate": 1.8612244897959185e-05, | |
| "loss": 0.2861, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26834872364997864, | |
| "step": 115, | |
| "valid_targets_mean": 1392.1, | |
| "valid_targets_min": 813 | |
| }, | |
| { | |
| "epoch": 0.29484029484029484, | |
| "grad_norm": 1.063821859045589, | |
| "learning_rate": 1.942857142857143e-05, | |
| "loss": 0.2882, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2948819100856781, | |
| "step": 120, | |
| "valid_targets_mean": 1364.4, | |
| "valid_targets_min": 710 | |
| }, | |
| { | |
| "epoch": 0.3071253071253071, | |
| "grad_norm": 0.9837731989100763, | |
| "learning_rate": 2.0244897959183672e-05, | |
| "loss": 0.2891, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2742517590522766, | |
| "step": 125, | |
| "valid_targets_mean": 1465.1, | |
| "valid_targets_min": 941 | |
| }, | |
| { | |
| "epoch": 0.3194103194103194, | |
| "grad_norm": 1.2088033631299338, | |
| "learning_rate": 2.106122448979592e-05, | |
| "loss": 0.2916, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.298936665058136, | |
| "step": 130, | |
| "valid_targets_mean": 1072.5, | |
| "valid_targets_min": 663 | |
| }, | |
| { | |
| "epoch": 0.3316953316953317, | |
| "grad_norm": 1.1123542063985024, | |
| "learning_rate": 2.1877551020408166e-05, | |
| "loss": 0.2846, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3043467104434967, | |
| "step": 135, | |
| "valid_targets_mean": 1425.1, | |
| "valid_targets_min": 727 | |
| }, | |
| { | |
| "epoch": 0.343980343980344, | |
| "grad_norm": 1.02834209312945, | |
| "learning_rate": 2.269387755102041e-05, | |
| "loss": 0.297, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31079795956611633, | |
| "step": 140, | |
| "valid_targets_mean": 1287.7, | |
| "valid_targets_min": 637 | |
| }, | |
| { | |
| "epoch": 0.35626535626535627, | |
| "grad_norm": 1.0098376349444529, | |
| "learning_rate": 2.3510204081632656e-05, | |
| "loss": 0.2748, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2863055467605591, | |
| "step": 145, | |
| "valid_targets_mean": 1450.8, | |
| "valid_targets_min": 724 | |
| }, | |
| { | |
| "epoch": 0.36855036855036855, | |
| "grad_norm": 1.0842107045063027, | |
| "learning_rate": 2.4326530612244898e-05, | |
| "loss": 0.287, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2861521542072296, | |
| "step": 150, | |
| "valid_targets_mean": 1355.8, | |
| "valid_targets_min": 646 | |
| }, | |
| { | |
| "epoch": 0.3808353808353808, | |
| "grad_norm": 1.0851670752985323, | |
| "learning_rate": 2.5142857142857143e-05, | |
| "loss": 0.273, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.265603244304657, | |
| "step": 155, | |
| "valid_targets_mean": 1190.7, | |
| "valid_targets_min": 700 | |
| }, | |
| { | |
| "epoch": 0.3931203931203931, | |
| "grad_norm": 1.1465258501983437, | |
| "learning_rate": 2.5959183673469392e-05, | |
| "loss": 0.2829, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2712598443031311, | |
| "step": 160, | |
| "valid_targets_mean": 1198.3, | |
| "valid_targets_min": 800 | |
| }, | |
| { | |
| "epoch": 0.40540540540540543, | |
| "grad_norm": 1.168180594059772, | |
| "learning_rate": 2.6775510204081637e-05, | |
| "loss": 0.2729, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26828718185424805, | |
| "step": 165, | |
| "valid_targets_mean": 1239.4, | |
| "valid_targets_min": 713 | |
| }, | |
| { | |
| "epoch": 0.4176904176904177, | |
| "grad_norm": 3.179301974952432, | |
| "learning_rate": 2.7591836734693882e-05, | |
| "loss": 0.2746, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2635612487792969, | |
| "step": 170, | |
| "valid_targets_mean": 1440.5, | |
| "valid_targets_min": 598 | |
| }, | |
| { | |
| "epoch": 0.42997542997543, | |
| "grad_norm": 1.1402043628747394, | |
| "learning_rate": 2.8408163265306124e-05, | |
| "loss": 0.2769, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26931822299957275, | |
| "step": 175, | |
| "valid_targets_mean": 1560.9, | |
| "valid_targets_min": 872 | |
| }, | |
| { | |
| "epoch": 0.44226044226044225, | |
| "grad_norm": 1.129369174656995, | |
| "learning_rate": 2.922448979591837e-05, | |
| "loss": 0.2782, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27974170446395874, | |
| "step": 180, | |
| "valid_targets_mean": 1178.6, | |
| "valid_targets_min": 721 | |
| }, | |
| { | |
| "epoch": 0.45454545454545453, | |
| "grad_norm": 1.04602917753856, | |
| "learning_rate": 3.0040816326530614e-05, | |
| "loss": 0.2686, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2702872157096863, | |
| "step": 185, | |
| "valid_targets_mean": 1167.0, | |
| "valid_targets_min": 516 | |
| }, | |
| { | |
| "epoch": 0.4668304668304668, | |
| "grad_norm": 1.1298127808186424, | |
| "learning_rate": 3.085714285714286e-05, | |
| "loss": 0.2636, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2870205044746399, | |
| "step": 190, | |
| "valid_targets_mean": 1329.9, | |
| "valid_targets_min": 619 | |
| }, | |
| { | |
| "epoch": 0.47911547911547914, | |
| "grad_norm": 1.1118369894692701, | |
| "learning_rate": 3.1673469387755105e-05, | |
| "loss": 0.2591, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24347294867038727, | |
| "step": 195, | |
| "valid_targets_mean": 1164.9, | |
| "valid_targets_min": 756 | |
| }, | |
| { | |
| "epoch": 0.4914004914004914, | |
| "grad_norm": 1.1474130320495435, | |
| "learning_rate": 3.2489795918367346e-05, | |
| "loss": 0.2523, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2255091518163681, | |
| "step": 200, | |
| "valid_targets_mean": 1299.4, | |
| "valid_targets_min": 618 | |
| }, | |
| { | |
| "epoch": 0.5036855036855037, | |
| "grad_norm": 1.069005428418841, | |
| "learning_rate": 3.3306122448979595e-05, | |
| "loss": 0.266, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2635962963104248, | |
| "step": 205, | |
| "valid_targets_mean": 1260.2, | |
| "valid_targets_min": 874 | |
| }, | |
| { | |
| "epoch": 0.515970515970516, | |
| "grad_norm": 1.1027237910608108, | |
| "learning_rate": 3.4122448979591843e-05, | |
| "loss": 0.261, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2622719705104828, | |
| "step": 210, | |
| "valid_targets_mean": 1420.3, | |
| "valid_targets_min": 861 | |
| }, | |
| { | |
| "epoch": 0.5282555282555282, | |
| "grad_norm": 1.0429197104233103, | |
| "learning_rate": 3.4938775510204085e-05, | |
| "loss": 0.2498, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24091707170009613, | |
| "step": 215, | |
| "valid_targets_mean": 1357.9, | |
| "valid_targets_min": 1000 | |
| }, | |
| { | |
| "epoch": 0.5405405405405406, | |
| "grad_norm": 0.9930718356587129, | |
| "learning_rate": 3.575510204081633e-05, | |
| "loss": 0.2574, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25741562247276306, | |
| "step": 220, | |
| "valid_targets_mean": 1370.2, | |
| "valid_targets_min": 783 | |
| }, | |
| { | |
| "epoch": 0.5528255528255528, | |
| "grad_norm": 1.0094009512682047, | |
| "learning_rate": 3.6571428571428576e-05, | |
| "loss": 0.2548, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24548643827438354, | |
| "step": 225, | |
| "valid_targets_mean": 1205.2, | |
| "valid_targets_min": 696 | |
| }, | |
| { | |
| "epoch": 0.5651105651105651, | |
| "grad_norm": 1.0739306060530445, | |
| "learning_rate": 3.738775510204082e-05, | |
| "loss": 0.2577, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2718384265899658, | |
| "step": 230, | |
| "valid_targets_mean": 1312.5, | |
| "valid_targets_min": 809 | |
| }, | |
| { | |
| "epoch": 0.5773955773955773, | |
| "grad_norm": 0.9557765341629152, | |
| "learning_rate": 3.8204081632653066e-05, | |
| "loss": 0.2508, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24578313529491425, | |
| "step": 235, | |
| "valid_targets_mean": 1375.5, | |
| "valid_targets_min": 725 | |
| }, | |
| { | |
| "epoch": 0.5896805896805897, | |
| "grad_norm": 0.931971921575949, | |
| "learning_rate": 3.902040816326531e-05, | |
| "loss": 0.261, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23134949803352356, | |
| "step": 240, | |
| "valid_targets_mean": 1403.4, | |
| "valid_targets_min": 631 | |
| }, | |
| { | |
| "epoch": 0.601965601965602, | |
| "grad_norm": 1.07899304112893, | |
| "learning_rate": 3.983673469387755e-05, | |
| "loss": 0.262, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.262241393327713, | |
| "step": 245, | |
| "valid_targets_mean": 1307.3, | |
| "valid_targets_min": 679 | |
| }, | |
| { | |
| "epoch": 0.6142506142506142, | |
| "grad_norm": 1.167164774236814, | |
| "learning_rate": 3.9999672841332876e-05, | |
| "loss": 0.2545, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2369876652956009, | |
| "step": 250, | |
| "valid_targets_mean": 1384.2, | |
| "valid_targets_min": 643 | |
| }, | |
| { | |
| "epoch": 0.6265356265356266, | |
| "grad_norm": 1.0945327185737628, | |
| "learning_rate": 3.999834377759164e-05, | |
| "loss": 0.2605, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25854480266571045, | |
| "step": 255, | |
| "valid_targets_mean": 1287.4, | |
| "valid_targets_min": 582 | |
| }, | |
| { | |
| "epoch": 0.6388206388206388, | |
| "grad_norm": 1.041040143552112, | |
| "learning_rate": 3.999599242924703e-05, | |
| "loss": 0.2575, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2597736120223999, | |
| "step": 260, | |
| "valid_targets_mean": 1212.7, | |
| "valid_targets_min": 577 | |
| }, | |
| { | |
| "epoch": 0.6511056511056511, | |
| "grad_norm": 0.9073322349842393, | |
| "learning_rate": 3.999261891649637e-05, | |
| "loss": 0.2571, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23483791947364807, | |
| "step": 265, | |
| "valid_targets_mean": 1461.9, | |
| "valid_targets_min": 979 | |
| }, | |
| { | |
| "epoch": 0.6633906633906634, | |
| "grad_norm": 0.9351495905223027, | |
| "learning_rate": 3.9988223411788436e-05, | |
| "loss": 0.2614, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26156753301620483, | |
| "step": 270, | |
| "valid_targets_mean": 1577.8, | |
| "valid_targets_min": 753 | |
| }, | |
| { | |
| "epoch": 0.6756756756756757, | |
| "grad_norm": 1.2088525258676897, | |
| "learning_rate": 3.998280613981468e-05, | |
| "loss": 0.2477, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24581116437911987, | |
| "step": 275, | |
| "valid_targets_mean": 1449.1, | |
| "valid_targets_min": 819 | |
| }, | |
| { | |
| "epoch": 0.687960687960688, | |
| "grad_norm": 1.1006566748406348, | |
| "learning_rate": 3.9976367377497725e-05, | |
| "loss": 0.252, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24314700067043304, | |
| "step": 280, | |
| "valid_targets_mean": 1120.2, | |
| "valid_targets_min": 631 | |
| }, | |
| { | |
| "epoch": 0.7002457002457002, | |
| "grad_norm": 1.042908345863946, | |
| "learning_rate": 3.99689074539772e-05, | |
| "loss": 0.2502, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24501767754554749, | |
| "step": 285, | |
| "valid_targets_mean": 1168.4, | |
| "valid_targets_min": 599 | |
| }, | |
| { | |
| "epoch": 0.7125307125307125, | |
| "grad_norm": 1.0140467367686494, | |
| "learning_rate": 3.9960426750592936e-05, | |
| "loss": 0.245, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2532154619693756, | |
| "step": 290, | |
| "valid_targets_mean": 1205.3, | |
| "valid_targets_min": 833 | |
| }, | |
| { | |
| "epoch": 0.7248157248157249, | |
| "grad_norm": 0.9072613471071872, | |
| "learning_rate": 3.995092570086546e-05, | |
| "loss": 0.2508, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23869293928146362, | |
| "step": 295, | |
| "valid_targets_mean": 1309.0, | |
| "valid_targets_min": 681 | |
| }, | |
| { | |
| "epoch": 0.7371007371007371, | |
| "grad_norm": 0.8786025671834236, | |
| "learning_rate": 3.9940404790473825e-05, | |
| "loss": 0.2469, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23132875561714172, | |
| "step": 300, | |
| "valid_targets_mean": 1249.8, | |
| "valid_targets_min": 656 | |
| }, | |
| { | |
| "epoch": 0.7493857493857494, | |
| "grad_norm": 0.9874153491297156, | |
| "learning_rate": 3.992886455723082e-05, | |
| "loss": 0.25, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.259071946144104, | |
| "step": 305, | |
| "valid_targets_mean": 1174.4, | |
| "valid_targets_min": 546 | |
| }, | |
| { | |
| "epoch": 0.7616707616707616, | |
| "grad_norm": 0.8822062580867077, | |
| "learning_rate": 3.991630559105541e-05, | |
| "loss": 0.2361, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22890859842300415, | |
| "step": 310, | |
| "valid_targets_mean": 1366.1, | |
| "valid_targets_min": 828 | |
| }, | |
| { | |
| "epoch": 0.773955773955774, | |
| "grad_norm": 1.1997048060495525, | |
| "learning_rate": 3.990272853394268e-05, | |
| "loss": 0.2446, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2312544882297516, | |
| "step": 315, | |
| "valid_targets_mean": 1168.6, | |
| "valid_targets_min": 692 | |
| }, | |
| { | |
| "epoch": 0.7862407862407862, | |
| "grad_norm": 0.8851873577261069, | |
| "learning_rate": 3.988813407993089e-05, | |
| "loss": 0.2258, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23458437621593475, | |
| "step": 320, | |
| "valid_targets_mean": 1399.3, | |
| "valid_targets_min": 848 | |
| }, | |
| { | |
| "epoch": 0.7985257985257985, | |
| "grad_norm": 0.8799859792612384, | |
| "learning_rate": 3.987252297506613e-05, | |
| "loss": 0.2406, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24704906344413757, | |
| "step": 325, | |
| "valid_targets_mean": 1277.9, | |
| "valid_targets_min": 659 | |
| }, | |
| { | |
| "epoch": 0.8108108108108109, | |
| "grad_norm": 0.9793235157162589, | |
| "learning_rate": 3.9855896017364075e-05, | |
| "loss": 0.2366, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24380642175674438, | |
| "step": 330, | |
| "valid_targets_mean": 1268.5, | |
| "valid_targets_min": 559 | |
| }, | |
| { | |
| "epoch": 0.8230958230958231, | |
| "grad_norm": 0.8985936094157285, | |
| "learning_rate": 3.983825405676927e-05, | |
| "loss": 0.2432, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2342374473810196, | |
| "step": 335, | |
| "valid_targets_mean": 1384.6, | |
| "valid_targets_min": 898 | |
| }, | |
| { | |
| "epoch": 0.8353808353808354, | |
| "grad_norm": 0.8558386006809837, | |
| "learning_rate": 3.981959799511161e-05, | |
| "loss": 0.2417, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2336934357881546, | |
| "step": 340, | |
| "valid_targets_mean": 1429.6, | |
| "valid_targets_min": 613 | |
| }, | |
| { | |
| "epoch": 0.8476658476658476, | |
| "grad_norm": 1.0127066085428036, | |
| "learning_rate": 3.979992878606032e-05, | |
| "loss": 0.2481, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22973856329917908, | |
| "step": 345, | |
| "valid_targets_mean": 1162.9, | |
| "valid_targets_min": 683 | |
| }, | |
| { | |
| "epoch": 0.85995085995086, | |
| "grad_norm": 0.9613524037318636, | |
| "learning_rate": 3.977924743507513e-05, | |
| "loss": 0.2474, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23490938544273376, | |
| "step": 350, | |
| "valid_targets_mean": 1256.2, | |
| "valid_targets_min": 592 | |
| }, | |
| { | |
| "epoch": 0.8722358722358723, | |
| "grad_norm": 1.1706265244866718, | |
| "learning_rate": 3.975755499935492e-05, | |
| "loss": 0.2558, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2792086601257324, | |
| "step": 355, | |
| "valid_targets_mean": 1220.6, | |
| "valid_targets_min": 597 | |
| }, | |
| { | |
| "epoch": 0.8845208845208845, | |
| "grad_norm": 0.8057713207877663, | |
| "learning_rate": 3.973485258778368e-05, | |
| "loss": 0.2318, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23073288798332214, | |
| "step": 360, | |
| "valid_targets_mean": 1582.8, | |
| "valid_targets_min": 696 | |
| }, | |
| { | |
| "epoch": 0.8968058968058968, | |
| "grad_norm": 0.9194633390375606, | |
| "learning_rate": 3.971114136087379e-05, | |
| "loss": 0.2407, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2423998862504959, | |
| "step": 365, | |
| "valid_targets_mean": 1402.8, | |
| "valid_targets_min": 834 | |
| }, | |
| { | |
| "epoch": 0.9090909090909091, | |
| "grad_norm": 0.8944115198348385, | |
| "learning_rate": 3.968642253070675e-05, | |
| "loss": 0.2347, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2302512228488922, | |
| "step": 370, | |
| "valid_targets_mean": 1309.1, | |
| "valid_targets_min": 601 | |
| }, | |
| { | |
| "epoch": 0.9213759213759214, | |
| "grad_norm": 1.218623065182673, | |
| "learning_rate": 3.966069736087116e-05, | |
| "loss": 0.2481, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24052146077156067, | |
| "step": 375, | |
| "valid_targets_mean": 1465.2, | |
| "valid_targets_min": 569 | |
| }, | |
| { | |
| "epoch": 0.9336609336609336, | |
| "grad_norm": 0.9655576671123012, | |
| "learning_rate": 3.963396716639818e-05, | |
| "loss": 0.2365, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23131707310676575, | |
| "step": 380, | |
| "valid_targets_mean": 1381.5, | |
| "valid_targets_min": 855 | |
| }, | |
| { | |
| "epoch": 0.9459459459459459, | |
| "grad_norm": 1.017295605724478, | |
| "learning_rate": 3.960623331369427e-05, | |
| "loss": 0.2355, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21069012582302094, | |
| "step": 385, | |
| "valid_targets_mean": 1067.1, | |
| "valid_targets_min": 833 | |
| }, | |
| { | |
| "epoch": 0.9582309582309583, | |
| "grad_norm": 0.8261436275308143, | |
| "learning_rate": 3.957749722047138e-05, | |
| "loss": 0.2303, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2373935580253601, | |
| "step": 390, | |
| "valid_targets_mean": 1291.5, | |
| "valid_targets_min": 639 | |
| }, | |
| { | |
| "epoch": 0.9705159705159705, | |
| "grad_norm": 0.8273087849494487, | |
| "learning_rate": 3.9547760355674405e-05, | |
| "loss": 0.2222, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22270742058753967, | |
| "step": 395, | |
| "valid_targets_mean": 1429.6, | |
| "valid_targets_min": 860 | |
| }, | |
| { | |
| "epoch": 0.9828009828009828, | |
| "grad_norm": 1.0770825093042904, | |
| "learning_rate": 3.951702423940621e-05, | |
| "loss": 0.2352, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24236102402210236, | |
| "step": 400, | |
| "valid_targets_mean": 1270.6, | |
| "valid_targets_min": 668 | |
| }, | |
| { | |
| "epoch": 0.995085995085995, | |
| "grad_norm": 0.8879277341294027, | |
| "learning_rate": 3.948529044284981e-05, | |
| "loss": 0.2302, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25073006749153137, | |
| "step": 405, | |
| "valid_targets_mean": 1449.1, | |
| "valid_targets_min": 605 | |
| }, | |
| { | |
| "epoch": 1.0073710073710074, | |
| "grad_norm": 0.8947128546624157, | |
| "learning_rate": 3.9452560588188135e-05, | |
| "loss": 0.2437, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23842811584472656, | |
| "step": 410, | |
| "valid_targets_mean": 1442.7, | |
| "valid_targets_min": 625 | |
| }, | |
| { | |
| "epoch": 1.0196560196560196, | |
| "grad_norm": 0.8106281245582916, | |
| "learning_rate": 3.9418836348521045e-05, | |
| "loss": 0.2309, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22238868474960327, | |
| "step": 415, | |
| "valid_targets_mean": 1561.7, | |
| "valid_targets_min": 854 | |
| }, | |
| { | |
| "epoch": 1.031941031941032, | |
| "grad_norm": 0.8925639393927697, | |
| "learning_rate": 3.9384119447779854e-05, | |
| "loss": 0.2231, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22074860334396362, | |
| "step": 420, | |
| "valid_targets_mean": 1369.3, | |
| "valid_targets_min": 679 | |
| }, | |
| { | |
| "epoch": 1.0442260442260443, | |
| "grad_norm": 0.9339576425043729, | |
| "learning_rate": 3.934841166063919e-05, | |
| "loss": 0.2192, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2140847146511078, | |
| "step": 425, | |
| "valid_targets_mean": 1252.8, | |
| "valid_targets_min": 555 | |
| }, | |
| { | |
| "epoch": 1.0565110565110565, | |
| "grad_norm": 0.991777417206008, | |
| "learning_rate": 3.931171481242625e-05, | |
| "loss": 0.2091, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20439454913139343, | |
| "step": 430, | |
| "valid_targets_mean": 1281.4, | |
| "valid_targets_min": 685 | |
| }, | |
| { | |
| "epoch": 1.0687960687960687, | |
| "grad_norm": 0.9829713522278857, | |
| "learning_rate": 3.927403077902753e-05, | |
| "loss": 0.2128, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22258663177490234, | |
| "step": 435, | |
| "valid_targets_mean": 1247.5, | |
| "valid_targets_min": 696 | |
| }, | |
| { | |
| "epoch": 1.0810810810810811, | |
| "grad_norm": 0.9211041506370193, | |
| "learning_rate": 3.9235361486792905e-05, | |
| "loss": 0.2234, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23965992033481598, | |
| "step": 440, | |
| "valid_targets_mean": 1353.1, | |
| "valid_targets_min": 859 | |
| }, | |
| { | |
| "epoch": 1.0933660933660934, | |
| "grad_norm": 0.9045362893017017, | |
| "learning_rate": 3.9195708912437176e-05, | |
| "loss": 0.2168, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19926020503044128, | |
| "step": 445, | |
| "valid_targets_mean": 1291.4, | |
| "valid_targets_min": 832 | |
| }, | |
| { | |
| "epoch": 1.1056511056511056, | |
| "grad_norm": 0.8191233471895205, | |
| "learning_rate": 3.915507508293901e-05, | |
| "loss": 0.2135, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2224387228488922, | |
| "step": 450, | |
| "valid_targets_mean": 1466.1, | |
| "valid_targets_min": 928 | |
| }, | |
| { | |
| "epoch": 1.117936117936118, | |
| "grad_norm": 0.8904732814162032, | |
| "learning_rate": 3.911346207543734e-05, | |
| "loss": 0.2098, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2164279818534851, | |
| "step": 455, | |
| "valid_targets_mean": 1386.1, | |
| "valid_targets_min": 676 | |
| }, | |
| { | |
| "epoch": 1.1302211302211302, | |
| "grad_norm": 0.8232258390131152, | |
| "learning_rate": 3.907087201712515e-05, | |
| "loss": 0.2337, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23600783944129944, | |
| "step": 460, | |
| "valid_targets_mean": 1351.2, | |
| "valid_targets_min": 516 | |
| }, | |
| { | |
| "epoch": 1.1425061425061425, | |
| "grad_norm": 0.9160925371017917, | |
| "learning_rate": 3.902730708514078e-05, | |
| "loss": 0.2077, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19188517332077026, | |
| "step": 465, | |
| "valid_targets_mean": 1237.4, | |
| "valid_targets_min": 707 | |
| }, | |
| { | |
| "epoch": 1.154791154791155, | |
| "grad_norm": 0.9653077975742607, | |
| "learning_rate": 3.8982769506456616e-05, | |
| "loss": 0.2132, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23154853284358978, | |
| "step": 470, | |
| "valid_targets_mean": 1205.5, | |
| "valid_targets_min": 633 | |
| }, | |
| { | |
| "epoch": 1.1670761670761671, | |
| "grad_norm": 1.032558733345922, | |
| "learning_rate": 3.893726155776524e-05, | |
| "loss": 0.2171, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20028898119926453, | |
| "step": 475, | |
| "valid_targets_mean": 1293.6, | |
| "valid_targets_min": 552 | |
| }, | |
| { | |
| "epoch": 1.1793611793611793, | |
| "grad_norm": 0.8648734673118065, | |
| "learning_rate": 3.8890785565363046e-05, | |
| "loss": 0.2102, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21280765533447266, | |
| "step": 480, | |
| "valid_targets_mean": 1276.3, | |
| "valid_targets_min": 772 | |
| }, | |
| { | |
| "epoch": 1.1916461916461916, | |
| "grad_norm": 0.9140586243501523, | |
| "learning_rate": 3.884334390503136e-05, | |
| "loss": 0.2192, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21797388792037964, | |
| "step": 485, | |
| "valid_targets_mean": 1113.1, | |
| "valid_targets_min": 623 | |
| }, | |
| { | |
| "epoch": 1.203931203931204, | |
| "grad_norm": 1.657437936084906, | |
| "learning_rate": 3.8794939001914955e-05, | |
| "loss": 0.2141, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2240276336669922, | |
| "step": 490, | |
| "valid_targets_mean": 1444.7, | |
| "valid_targets_min": 721 | |
| }, | |
| { | |
| "epoch": 1.2162162162162162, | |
| "grad_norm": 2.51468660321448, | |
| "learning_rate": 3.87455733303981e-05, | |
| "loss": 0.2179, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22258678078651428, | |
| "step": 495, | |
| "valid_targets_mean": 1192.3, | |
| "valid_targets_min": 559 | |
| }, | |
| { | |
| "epoch": 1.2285012285012284, | |
| "grad_norm": 0.8804600759036063, | |
| "learning_rate": 3.869524941397805e-05, | |
| "loss": 0.2148, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21639510989189148, | |
| "step": 500, | |
| "valid_targets_mean": 1267.6, | |
| "valid_targets_min": 702 | |
| }, | |
| { | |
| "epoch": 1.2407862407862407, | |
| "grad_norm": 0.8467225453383569, | |
| "learning_rate": 3.8643969825136095e-05, | |
| "loss": 0.2163, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22254712879657745, | |
| "step": 505, | |
| "valid_targets_mean": 1189.6, | |
| "valid_targets_min": 599 | |
| }, | |
| { | |
| "epoch": 1.253071253071253, | |
| "grad_norm": 0.8462758268826358, | |
| "learning_rate": 3.8591737185206024e-05, | |
| "loss": 0.2155, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2053638994693756, | |
| "step": 510, | |
| "valid_targets_mean": 1360.9, | |
| "valid_targets_min": 680 | |
| }, | |
| { | |
| "epoch": 1.2653562653562653, | |
| "grad_norm": 1.0215895978299556, | |
| "learning_rate": 3.853855416424011e-05, | |
| "loss": 0.2152, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22390325367450714, | |
| "step": 515, | |
| "valid_targets_mean": 1262.0, | |
| "valid_targets_min": 666 | |
| }, | |
| { | |
| "epoch": 1.2776412776412776, | |
| "grad_norm": 0.8147207599642337, | |
| "learning_rate": 3.848442348087267e-05, | |
| "loss": 0.2126, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19952106475830078, | |
| "step": 520, | |
| "valid_targets_mean": 1425.7, | |
| "valid_targets_min": 764 | |
| }, | |
| { | |
| "epoch": 1.28992628992629, | |
| "grad_norm": 0.9742459909301096, | |
| "learning_rate": 3.842934790218106e-05, | |
| "loss": 0.2101, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21026363968849182, | |
| "step": 525, | |
| "valid_targets_mean": 1308.6, | |
| "valid_targets_min": 856 | |
| }, | |
| { | |
| "epoch": 1.3022113022113022, | |
| "grad_norm": 0.9740173370833528, | |
| "learning_rate": 3.837333024354422e-05, | |
| "loss": 0.2148, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23334957659244537, | |
| "step": 530, | |
| "valid_targets_mean": 1284.8, | |
| "valid_targets_min": 563 | |
| }, | |
| { | |
| "epoch": 1.3144963144963144, | |
| "grad_norm": 0.8187427157224925, | |
| "learning_rate": 3.8316373368498794e-05, | |
| "loss": 0.1986, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2131049931049347, | |
| "step": 535, | |
| "valid_targets_mean": 1468.4, | |
| "valid_targets_min": 604 | |
| }, | |
| { | |
| "epoch": 1.3267813267813269, | |
| "grad_norm": 0.8304282368649618, | |
| "learning_rate": 3.82584801885927e-05, | |
| "loss": 0.2173, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21338972449302673, | |
| "step": 540, | |
| "valid_targets_mean": 1441.9, | |
| "valid_targets_min": 695 | |
| }, | |
| { | |
| "epoch": 1.339066339066339, | |
| "grad_norm": 0.7991155005432057, | |
| "learning_rate": 3.8199653663236336e-05, | |
| "loss": 0.2197, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2283744215965271, | |
| "step": 545, | |
| "valid_targets_mean": 1425.4, | |
| "valid_targets_min": 957 | |
| }, | |
| { | |
| "epoch": 1.3513513513513513, | |
| "grad_norm": 0.8159559141426797, | |
| "learning_rate": 3.813989679955128e-05, | |
| "loss": 0.2107, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20721521973609924, | |
| "step": 550, | |
| "valid_targets_mean": 1368.8, | |
| "valid_targets_min": 826 | |
| }, | |
| { | |
| "epoch": 1.3636363636363638, | |
| "grad_norm": 0.8292495885563592, | |
| "learning_rate": 3.8079212652216595e-05, | |
| "loss": 0.205, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2114328145980835, | |
| "step": 555, | |
| "valid_targets_mean": 1369.9, | |
| "valid_targets_min": 602 | |
| }, | |
| { | |
| "epoch": 1.375921375921376, | |
| "grad_norm": 0.7791485862684727, | |
| "learning_rate": 3.8017604323312616e-05, | |
| "loss": 0.2085, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20806100964546204, | |
| "step": 560, | |
| "valid_targets_mean": 1351.6, | |
| "valid_targets_min": 717 | |
| }, | |
| { | |
| "epoch": 1.3882063882063882, | |
| "grad_norm": 1.3149893845062905, | |
| "learning_rate": 3.795507496216246e-05, | |
| "loss": 0.2216, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23105943202972412, | |
| "step": 565, | |
| "valid_targets_mean": 1307.3, | |
| "valid_targets_min": 755 | |
| }, | |
| { | |
| "epoch": 1.4004914004914004, | |
| "grad_norm": 0.8570970940886774, | |
| "learning_rate": 3.789162776517098e-05, | |
| "loss": 0.2132, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21551501750946045, | |
| "step": 570, | |
| "valid_targets_mean": 1304.6, | |
| "valid_targets_min": 677 | |
| }, | |
| { | |
| "epoch": 1.4127764127764126, | |
| "grad_norm": 0.7883552689352117, | |
| "learning_rate": 3.78272659756614e-05, | |
| "loss": 0.2184, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.233804389834404, | |
| "step": 575, | |
| "valid_targets_mean": 1437.6, | |
| "valid_targets_min": 699 | |
| }, | |
| { | |
| "epoch": 1.425061425061425, | |
| "grad_norm": 0.8485054939608575, | |
| "learning_rate": 3.776199288370948e-05, | |
| "loss": 0.2191, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2317577749490738, | |
| "step": 580, | |
| "valid_targets_mean": 1419.3, | |
| "valid_targets_min": 851 | |
| }, | |
| { | |
| "epoch": 1.4373464373464373, | |
| "grad_norm": 0.8821221515373305, | |
| "learning_rate": 3.7695811825975386e-05, | |
| "loss": 0.207, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.196481391787529, | |
| "step": 585, | |
| "valid_targets_mean": 1047.1, | |
| "valid_targets_min": 571 | |
| }, | |
| { | |
| "epoch": 1.4496314496314495, | |
| "grad_norm": 0.8065062953487124, | |
| "learning_rate": 3.76287261855331e-05, | |
| "loss": 0.2081, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20574833452701569, | |
| "step": 590, | |
| "valid_targets_mean": 1468.5, | |
| "valid_targets_min": 906 | |
| }, | |
| { | |
| "epoch": 1.461916461916462, | |
| "grad_norm": 0.7440073520773791, | |
| "learning_rate": 3.7560739391697465e-05, | |
| "loss": 0.216, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18903203308582306, | |
| "step": 595, | |
| "valid_targets_mean": 1226.5, | |
| "valid_targets_min": 600 | |
| }, | |
| { | |
| "epoch": 1.4742014742014742, | |
| "grad_norm": 0.9226667751453849, | |
| "learning_rate": 3.749185491984891e-05, | |
| "loss": 0.2114, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2006608545780182, | |
| "step": 600, | |
| "valid_targets_mean": 1265.7, | |
| "valid_targets_min": 772 | |
| }, | |
| { | |
| "epoch": 1.4864864864864864, | |
| "grad_norm": 0.8332504503435176, | |
| "learning_rate": 3.7422076291255785e-05, | |
| "loss": 0.1991, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19059884548187256, | |
| "step": 605, | |
| "valid_targets_mean": 1247.5, | |
| "valid_targets_min": 677 | |
| }, | |
| { | |
| "epoch": 1.4987714987714988, | |
| "grad_norm": 0.8346547105403617, | |
| "learning_rate": 3.7351407072894356e-05, | |
| "loss": 0.2093, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20191192626953125, | |
| "step": 610, | |
| "valid_targets_mean": 1286.6, | |
| "valid_targets_min": 957 | |
| }, | |
| { | |
| "epoch": 1.511056511056511, | |
| "grad_norm": 0.8216354598584255, | |
| "learning_rate": 3.7279850877266486e-05, | |
| "loss": 0.2215, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22761741280555725, | |
| "step": 615, | |
| "valid_targets_mean": 1334.2, | |
| "valid_targets_min": 727 | |
| }, | |
| { | |
| "epoch": 1.5233415233415233, | |
| "grad_norm": 0.8103032517780977, | |
| "learning_rate": 3.720741136221491e-05, | |
| "loss": 0.208, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21197880804538727, | |
| "step": 620, | |
| "valid_targets_mean": 1292.1, | |
| "valid_targets_min": 704 | |
| }, | |
| { | |
| "epoch": 1.5356265356265357, | |
| "grad_norm": 0.9603067451110899, | |
| "learning_rate": 3.713409223073636e-05, | |
| "loss": 0.217, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21192476153373718, | |
| "step": 625, | |
| "valid_targets_mean": 1296.1, | |
| "valid_targets_min": 501 | |
| }, | |
| { | |
| "epoch": 1.547911547911548, | |
| "grad_norm": 0.8108512331739913, | |
| "learning_rate": 3.705989723079214e-05, | |
| "loss": 0.2179, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2274690717458725, | |
| "step": 630, | |
| "valid_targets_mean": 1526.4, | |
| "valid_targets_min": 516 | |
| }, | |
| { | |
| "epoch": 1.5601965601965602, | |
| "grad_norm": 0.797600412989892, | |
| "learning_rate": 3.698483015511665e-05, | |
| "loss": 0.2117, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21435728669166565, | |
| "step": 635, | |
| "valid_targets_mean": 1382.8, | |
| "valid_targets_min": 608 | |
| }, | |
| { | |
| "epoch": 1.5724815724815726, | |
| "grad_norm": 0.7784701082877536, | |
| "learning_rate": 3.690889484102344e-05, | |
| "loss": 0.2084, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19009949266910553, | |
| "step": 640, | |
| "valid_targets_mean": 1198.1, | |
| "valid_targets_min": 587 | |
| }, | |
| { | |
| "epoch": 1.5847665847665846, | |
| "grad_norm": 0.8204636918022205, | |
| "learning_rate": 3.683209517020908e-05, | |
| "loss": 0.2107, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22691360116004944, | |
| "step": 645, | |
| "valid_targets_mean": 1355.8, | |
| "valid_targets_min": 678 | |
| }, | |
| { | |
| "epoch": 1.597051597051597, | |
| "grad_norm": 0.8647533597450896, | |
| "learning_rate": 3.675443506855473e-05, | |
| "loss": 0.2139, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25139498710632324, | |
| "step": 650, | |
| "valid_targets_mean": 1355.8, | |
| "valid_targets_min": 749 | |
| }, | |
| { | |
| "epoch": 1.6093366093366095, | |
| "grad_norm": 0.807649928860617, | |
| "learning_rate": 3.6675918505925456e-05, | |
| "loss": 0.2125, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1932116448879242, | |
| "step": 655, | |
| "valid_targets_mean": 1209.3, | |
| "valid_targets_min": 622 | |
| }, | |
| { | |
| "epoch": 1.6216216216216215, | |
| "grad_norm": 0.8409035864952312, | |
| "learning_rate": 3.6596549495967276e-05, | |
| "loss": 0.2121, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22172348201274872, | |
| "step": 660, | |
| "valid_targets_mean": 1284.0, | |
| "valid_targets_min": 749 | |
| }, | |
| { | |
| "epoch": 1.633906633906634, | |
| "grad_norm": 0.7604429301405407, | |
| "learning_rate": 3.651633209590202e-05, | |
| "loss": 0.2131, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1825447380542755, | |
| "step": 665, | |
| "valid_targets_mean": 1283.6, | |
| "valid_targets_min": 532 | |
| }, | |
| { | |
| "epoch": 1.6461916461916462, | |
| "grad_norm": 0.7659901458428126, | |
| "learning_rate": 3.6435270406319914e-05, | |
| "loss": 0.2277, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21672038733959198, | |
| "step": 670, | |
| "valid_targets_mean": 1435.8, | |
| "valid_targets_min": 691 | |
| }, | |
| { | |
| "epoch": 1.6584766584766584, | |
| "grad_norm": 0.8053182561114196, | |
| "learning_rate": 3.635336857096997e-05, | |
| "loss": 0.2101, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20726902782917023, | |
| "step": 675, | |
| "valid_targets_mean": 1312.5, | |
| "valid_targets_min": 863 | |
| }, | |
| { | |
| "epoch": 1.6707616707616708, | |
| "grad_norm": 0.8185865614740763, | |
| "learning_rate": 3.627063077654815e-05, | |
| "loss": 0.2111, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20119062066078186, | |
| "step": 680, | |
| "valid_targets_mean": 1363.4, | |
| "valid_targets_min": 538 | |
| }, | |
| { | |
| "epoch": 1.683046683046683, | |
| "grad_norm": 0.807018959146467, | |
| "learning_rate": 3.618706125248337e-05, | |
| "loss": 0.2055, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20590078830718994, | |
| "step": 685, | |
| "valid_targets_mean": 1409.5, | |
| "valid_targets_min": 747 | |
| }, | |
| { | |
| "epoch": 1.6953316953316953, | |
| "grad_norm": 0.8830545181819663, | |
| "learning_rate": 3.6102664270721275e-05, | |
| "loss": 0.2167, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21380965411663055, | |
| "step": 690, | |
| "valid_targets_mean": 1345.3, | |
| "valid_targets_min": 655 | |
| }, | |
| { | |
| "epoch": 1.7076167076167077, | |
| "grad_norm": 0.7052732587601278, | |
| "learning_rate": 3.601744414550589e-05, | |
| "loss": 0.2072, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2043401002883911, | |
| "step": 695, | |
| "valid_targets_mean": 1457.3, | |
| "valid_targets_min": 563 | |
| }, | |
| { | |
| "epoch": 1.71990171990172, | |
| "grad_norm": 0.8372550112027383, | |
| "learning_rate": 3.593140523315906e-05, | |
| "loss": 0.2152, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21121618151664734, | |
| "step": 700, | |
| "valid_targets_mean": 1263.1, | |
| "valid_targets_min": 618 | |
| }, | |
| { | |
| "epoch": 1.7321867321867321, | |
| "grad_norm": 0.8350785545217613, | |
| "learning_rate": 3.584455193185778e-05, | |
| "loss": 0.2148, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20538601279258728, | |
| "step": 705, | |
| "valid_targets_mean": 1293.8, | |
| "valid_targets_min": 641 | |
| }, | |
| { | |
| "epoch": 1.7444717444717446, | |
| "grad_norm": 0.8295482471386844, | |
| "learning_rate": 3.575688868140933e-05, | |
| "loss": 0.2194, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20931079983711243, | |
| "step": 710, | |
| "valid_targets_mean": 1464.4, | |
| "valid_targets_min": 796 | |
| }, | |
| { | |
| "epoch": 1.7567567567567568, | |
| "grad_norm": 0.7660683583076219, | |
| "learning_rate": 3.566841996302438e-05, | |
| "loss": 0.2078, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20046433806419373, | |
| "step": 715, | |
| "valid_targets_mean": 1362.8, | |
| "valid_targets_min": 639 | |
| }, | |
| { | |
| "epoch": 1.769041769041769, | |
| "grad_norm": 0.7881595588783534, | |
| "learning_rate": 3.557915029908787e-05, | |
| "loss": 0.2151, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2258824110031128, | |
| "step": 720, | |
| "valid_targets_mean": 1335.9, | |
| "valid_targets_min": 775 | |
| }, | |
| { | |
| "epoch": 1.7813267813267815, | |
| "grad_norm": 0.7860239413116547, | |
| "learning_rate": 3.548908425292784e-05, | |
| "loss": 0.2095, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.200752854347229, | |
| "step": 725, | |
| "valid_targets_mean": 1222.9, | |
| "valid_targets_min": 765 | |
| }, | |
| { | |
| "epoch": 1.7936117936117935, | |
| "grad_norm": 0.7979127231384053, | |
| "learning_rate": 3.5398226428582165e-05, | |
| "loss": 0.2052, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19026988744735718, | |
| "step": 730, | |
| "valid_targets_mean": 1225.9, | |
| "valid_targets_min": 576 | |
| }, | |
| { | |
| "epoch": 1.805896805896806, | |
| "grad_norm": 0.7454421136872221, | |
| "learning_rate": 3.530658147056321e-05, | |
| "loss": 0.2068, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20991122722625732, | |
| "step": 735, | |
| "valid_targets_mean": 1544.0, | |
| "valid_targets_min": 748 | |
| }, | |
| { | |
| "epoch": 1.8181818181818183, | |
| "grad_norm": 0.8340078250686125, | |
| "learning_rate": 3.521415406362041e-05, | |
| "loss": 0.2101, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22280725836753845, | |
| "step": 740, | |
| "valid_targets_mean": 1295.1, | |
| "valid_targets_min": 695 | |
| }, | |
| { | |
| "epoch": 1.8304668304668303, | |
| "grad_norm": 0.8016642773200651, | |
| "learning_rate": 3.512094893250076e-05, | |
| "loss": 0.2112, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1997978836297989, | |
| "step": 745, | |
| "valid_targets_mean": 1146.8, | |
| "valid_targets_min": 592 | |
| }, | |
| { | |
| "epoch": 1.8427518427518428, | |
| "grad_norm": 0.8390872499629237, | |
| "learning_rate": 3.5026970841707366e-05, | |
| "loss": 0.2093, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22235485911369324, | |
| "step": 750, | |
| "valid_targets_mean": 1444.1, | |
| "valid_targets_min": 936 | |
| }, | |
| { | |
| "epoch": 1.855036855036855, | |
| "grad_norm": 1.0764313618991608, | |
| "learning_rate": 3.493222459525579e-05, | |
| "loss": 0.1994, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20376664400100708, | |
| "step": 755, | |
| "valid_targets_mean": 1232.6, | |
| "valid_targets_min": 750 | |
| }, | |
| { | |
| "epoch": 1.8673218673218672, | |
| "grad_norm": 0.8168638944580988, | |
| "learning_rate": 3.483671503642858e-05, | |
| "loss": 0.206, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2079996019601822, | |
| "step": 760, | |
| "valid_targets_mean": 1327.8, | |
| "valid_targets_min": 681 | |
| }, | |
| { | |
| "epoch": 1.8796068796068797, | |
| "grad_norm": 0.745738380390571, | |
| "learning_rate": 3.474044704752761e-05, | |
| "loss": 0.2112, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20893272757530212, | |
| "step": 765, | |
| "valid_targets_mean": 1293.1, | |
| "valid_targets_min": 868 | |
| }, | |
| { | |
| "epoch": 1.8918918918918919, | |
| "grad_norm": 0.7475308186925751, | |
| "learning_rate": 3.464342554962454e-05, | |
| "loss": 0.1995, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20738467574119568, | |
| "step": 770, | |
| "valid_targets_mean": 1729.1, | |
| "valid_targets_min": 1076 | |
| }, | |
| { | |
| "epoch": 1.904176904176904, | |
| "grad_norm": 0.7371270174891834, | |
| "learning_rate": 3.4545655502309254e-05, | |
| "loss": 0.2025, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18743681907653809, | |
| "step": 775, | |
| "valid_targets_mean": 1302.1, | |
| "valid_targets_min": 690 | |
| }, | |
| { | |
| "epoch": 1.9164619164619165, | |
| "grad_norm": 0.7708714538466114, | |
| "learning_rate": 3.444714190343633e-05, | |
| "loss": 0.2079, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20575056970119476, | |
| "step": 780, | |
| "valid_targets_mean": 1459.2, | |
| "valid_targets_min": 655 | |
| }, | |
| { | |
| "epoch": 1.9287469287469288, | |
| "grad_norm": 0.7874679056207637, | |
| "learning_rate": 3.434788978886957e-05, | |
| "loss": 0.21, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2159155160188675, | |
| "step": 785, | |
| "valid_targets_mean": 1388.4, | |
| "valid_targets_min": 582 | |
| }, | |
| { | |
| "epoch": 1.941031941031941, | |
| "grad_norm": 0.7905885440727076, | |
| "learning_rate": 3.424790423222455e-05, | |
| "loss": 0.1956, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.205259770154953, | |
| "step": 790, | |
| "valid_targets_mean": 1329.3, | |
| "valid_targets_min": 578 | |
| }, | |
| { | |
| "epoch": 1.9533169533169534, | |
| "grad_norm": 0.8308257005767463, | |
| "learning_rate": 3.414719034460928e-05, | |
| "loss": 0.1996, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20556990802288055, | |
| "step": 795, | |
| "valid_targets_mean": 1332.4, | |
| "valid_targets_min": 854 | |
| }, | |
| { | |
| "epoch": 1.9656019656019657, | |
| "grad_norm": 0.9472206223883171, | |
| "learning_rate": 3.404575327436294e-05, | |
| "loss": 0.2025, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1957746148109436, | |
| "step": 800, | |
| "valid_targets_mean": 1276.2, | |
| "valid_targets_min": 620 | |
| }, | |
| { | |
| "epoch": 1.9778869778869779, | |
| "grad_norm": 0.7903292632563971, | |
| "learning_rate": 3.3943598206792665e-05, | |
| "loss": 0.1988, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19310012459754944, | |
| "step": 805, | |
| "valid_targets_mean": 1181.9, | |
| "valid_targets_min": 665 | |
| }, | |
| { | |
| "epoch": 1.9901719901719903, | |
| "grad_norm": 0.7852111938671402, | |
| "learning_rate": 3.384073036390857e-05, | |
| "loss": 0.2021, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20907938480377197, | |
| "step": 810, | |
| "valid_targets_mean": 1480.4, | |
| "valid_targets_min": 720 | |
| }, | |
| { | |
| "epoch": 2.0024570024570023, | |
| "grad_norm": 0.7468870769648174, | |
| "learning_rate": 3.373715500415667e-05, | |
| "loss": 0.2026, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17850954830646515, | |
| "step": 815, | |
| "valid_targets_mean": 1413.7, | |
| "valid_targets_min": 758 | |
| }, | |
| { | |
| "epoch": 2.0147420147420148, | |
| "grad_norm": 0.924740949773517, | |
| "learning_rate": 3.363287742215023e-05, | |
| "loss": 0.1737, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17179608345031738, | |
| "step": 820, | |
| "valid_targets_mean": 1223.9, | |
| "valid_targets_min": 806 | |
| }, | |
| { | |
| "epoch": 2.027027027027027, | |
| "grad_norm": 0.8331884170720032, | |
| "learning_rate": 3.352790294839898e-05, | |
| "loss": 0.1844, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17778846621513367, | |
| "step": 825, | |
| "valid_targets_mean": 1208.3, | |
| "valid_targets_min": 608 | |
| }, | |
| { | |
| "epoch": 2.039312039312039, | |
| "grad_norm": 0.8511286144490444, | |
| "learning_rate": 3.3422236949036726e-05, | |
| "loss": 0.1774, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18581384420394897, | |
| "step": 830, | |
| "valid_targets_mean": 1115.2, | |
| "valid_targets_min": 527 | |
| }, | |
| { | |
| "epoch": 2.0515970515970516, | |
| "grad_norm": 0.8211112790781864, | |
| "learning_rate": 3.331588482554697e-05, | |
| "loss": 0.1785, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1763433814048767, | |
| "step": 835, | |
| "valid_targets_mean": 1319.5, | |
| "valid_targets_min": 753 | |
| }, | |
| { | |
| "epoch": 2.063882063882064, | |
| "grad_norm": 0.8620117378217113, | |
| "learning_rate": 3.320885201448684e-05, | |
| "loss": 0.1828, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17555469274520874, | |
| "step": 840, | |
| "valid_targets_mean": 1287.9, | |
| "valid_targets_min": 865 | |
| }, | |
| { | |
| "epoch": 2.076167076167076, | |
| "grad_norm": 0.8885688434846413, | |
| "learning_rate": 3.310114398720917e-05, | |
| "loss": 0.1824, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17867690324783325, | |
| "step": 845, | |
| "valid_targets_mean": 1190.2, | |
| "valid_targets_min": 713 | |
| }, | |
| { | |
| "epoch": 2.0884520884520885, | |
| "grad_norm": 0.8297047108202436, | |
| "learning_rate": 3.299276624958281e-05, | |
| "loss": 0.1858, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19857417047023773, | |
| "step": 850, | |
| "valid_targets_mean": 1453.6, | |
| "valid_targets_min": 672 | |
| }, | |
| { | |
| "epoch": 2.100737100737101, | |
| "grad_norm": 0.8911031125113326, | |
| "learning_rate": 3.288372434171116e-05, | |
| "loss": 0.1773, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17555376887321472, | |
| "step": 855, | |
| "valid_targets_mean": 1394.6, | |
| "valid_targets_min": 742 | |
| }, | |
| { | |
| "epoch": 2.113022113022113, | |
| "grad_norm": 0.941251664327944, | |
| "learning_rate": 3.2774023837648986e-05, | |
| "loss": 0.1849, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.202108234167099, | |
| "step": 860, | |
| "valid_targets_mean": 1273.4, | |
| "valid_targets_min": 591 | |
| }, | |
| { | |
| "epoch": 2.1253071253071254, | |
| "grad_norm": 0.84372875880664, | |
| "learning_rate": 3.26636703451175e-05, | |
| "loss": 0.1813, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16926707327365875, | |
| "step": 865, | |
| "valid_targets_mean": 1109.5, | |
| "valid_targets_min": 536 | |
| }, | |
| { | |
| "epoch": 2.1375921375921374, | |
| "grad_norm": 0.8209078079641056, | |
| "learning_rate": 3.2552669505217646e-05, | |
| "loss": 0.1829, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18738217651844025, | |
| "step": 870, | |
| "valid_targets_mean": 1309.8, | |
| "valid_targets_min": 640 | |
| }, | |
| { | |
| "epoch": 2.14987714987715, | |
| "grad_norm": 0.8427561830288098, | |
| "learning_rate": 3.24410269921418e-05, | |
| "loss": 0.1797, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1619742512702942, | |
| "step": 875, | |
| "valid_targets_mean": 1132.6, | |
| "valid_targets_min": 807 | |
| }, | |
| { | |
| "epoch": 2.1621621621621623, | |
| "grad_norm": 0.933209325917652, | |
| "learning_rate": 3.232874851288367e-05, | |
| "loss": 0.1825, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17029553651809692, | |
| "step": 880, | |
| "valid_targets_mean": 1280.0, | |
| "valid_targets_min": 706 | |
| }, | |
| { | |
| "epoch": 2.1744471744471743, | |
| "grad_norm": 0.8767962392652733, | |
| "learning_rate": 3.221583980694659e-05, | |
| "loss": 0.1829, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17228546738624573, | |
| "step": 885, | |
| "valid_targets_mean": 1235.9, | |
| "valid_targets_min": 742 | |
| }, | |
| { | |
| "epoch": 2.1867321867321867, | |
| "grad_norm": 0.8090321149992089, | |
| "learning_rate": 3.21023066460501e-05, | |
| "loss": 0.1745, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1814998984336853, | |
| "step": 890, | |
| "valid_targets_mean": 1525.5, | |
| "valid_targets_min": 826 | |
| }, | |
| { | |
| "epoch": 2.199017199017199, | |
| "grad_norm": 0.8273796132984166, | |
| "learning_rate": 3.198815483383492e-05, | |
| "loss": 0.1818, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1752960979938507, | |
| "step": 895, | |
| "valid_targets_mean": 1178.8, | |
| "valid_targets_min": 474 | |
| }, | |
| { | |
| "epoch": 2.211302211302211, | |
| "grad_norm": 0.8866159558691586, | |
| "learning_rate": 3.1873390205566295e-05, | |
| "loss": 0.1838, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19467955827713013, | |
| "step": 900, | |
| "valid_targets_mean": 1225.6, | |
| "valid_targets_min": 647 | |
| }, | |
| { | |
| "epoch": 2.2235872235872236, | |
| "grad_norm": 0.8092185926804433, | |
| "learning_rate": 3.175801862783565e-05, | |
| "loss": 0.1827, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19313837587833405, | |
| "step": 905, | |
| "valid_targets_mean": 1512.1, | |
| "valid_targets_min": 762 | |
| }, | |
| { | |
| "epoch": 2.235872235872236, | |
| "grad_norm": 0.8589507163546664, | |
| "learning_rate": 3.164204599826077e-05, | |
| "loss": 0.1808, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18503150343894958, | |
| "step": 910, | |
| "valid_targets_mean": 1290.1, | |
| "valid_targets_min": 707 | |
| }, | |
| { | |
| "epoch": 2.248157248157248, | |
| "grad_norm": 0.8026604075367092, | |
| "learning_rate": 3.1525478245184245e-05, | |
| "loss": 0.1784, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2060866802930832, | |
| "step": 915, | |
| "valid_targets_mean": 1385.6, | |
| "valid_targets_min": 760 | |
| }, | |
| { | |
| "epoch": 2.2604422604422605, | |
| "grad_norm": 0.7483684778968921, | |
| "learning_rate": 3.140832132737051e-05, | |
| "loss": 0.1765, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1796853095293045, | |
| "step": 920, | |
| "valid_targets_mean": 1393.4, | |
| "valid_targets_min": 745 | |
| }, | |
| { | |
| "epoch": 2.2727272727272725, | |
| "grad_norm": 0.7287292647036364, | |
| "learning_rate": 3.129058123370116e-05, | |
| "loss": 0.1814, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1785249412059784, | |
| "step": 925, | |
| "valid_targets_mean": 1463.0, | |
| "valid_targets_min": 806 | |
| }, | |
| { | |
| "epoch": 2.285012285012285, | |
| "grad_norm": 0.9897658153998916, | |
| "learning_rate": 3.117226398286887e-05, | |
| "loss": 0.1761, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1785978078842163, | |
| "step": 930, | |
| "valid_targets_mean": 1234.6, | |
| "valid_targets_min": 637 | |
| }, | |
| { | |
| "epoch": 2.2972972972972974, | |
| "grad_norm": 0.7954803428561056, | |
| "learning_rate": 3.105337562306968e-05, | |
| "loss": 0.1865, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18902160227298737, | |
| "step": 935, | |
| "valid_targets_mean": 1597.8, | |
| "valid_targets_min": 939 | |
| }, | |
| { | |
| "epoch": 2.30958230958231, | |
| "grad_norm": 0.8103352348732381, | |
| "learning_rate": 3.0933922231693854e-05, | |
| "loss": 0.1802, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1915901005268097, | |
| "step": 940, | |
| "valid_targets_mean": 1381.9, | |
| "valid_targets_min": 768 | |
| }, | |
| { | |
| "epoch": 2.321867321867322, | |
| "grad_norm": 0.8400259293479042, | |
| "learning_rate": 3.08139099150152e-05, | |
| "loss": 0.1891, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19923238456249237, | |
| "step": 945, | |
| "valid_targets_mean": 1377.6, | |
| "valid_targets_min": 902 | |
| }, | |
| { | |
| "epoch": 2.3341523341523343, | |
| "grad_norm": 0.7992977045728782, | |
| "learning_rate": 3.069334480787893e-05, | |
| "loss": 0.1807, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1746300458908081, | |
| "step": 950, | |
| "valid_targets_mean": 1442.1, | |
| "valid_targets_min": 564 | |
| }, | |
| { | |
| "epoch": 2.3464373464373462, | |
| "grad_norm": 0.8147962505463162, | |
| "learning_rate": 3.057223307338806e-05, | |
| "loss": 0.1837, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18987751007080078, | |
| "step": 955, | |
| "valid_targets_mean": 1536.8, | |
| "valid_targets_min": 823 | |
| }, | |
| { | |
| "epoch": 2.3587223587223587, | |
| "grad_norm": 0.8727323456424065, | |
| "learning_rate": 3.0450580902588346e-05, | |
| "loss": 0.1772, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18512067198753357, | |
| "step": 960, | |
| "valid_targets_mean": 1313.1, | |
| "valid_targets_min": 777 | |
| }, | |
| { | |
| "epoch": 2.371007371007371, | |
| "grad_norm": 0.8132008349826025, | |
| "learning_rate": 3.032839451415182e-05, | |
| "loss": 0.1839, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1768190562725067, | |
| "step": 965, | |
| "valid_targets_mean": 1203.5, | |
| "valid_targets_min": 457 | |
| }, | |
| { | |
| "epoch": 2.383292383292383, | |
| "grad_norm": 0.7343719628297339, | |
| "learning_rate": 3.0205680154058904e-05, | |
| "loss": 0.1786, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17429158091545105, | |
| "step": 970, | |
| "valid_targets_mean": 1550.9, | |
| "valid_targets_min": 697 | |
| }, | |
| { | |
| "epoch": 2.3955773955773956, | |
| "grad_norm": 0.8349006168994485, | |
| "learning_rate": 3.0082444095279117e-05, | |
| "loss": 0.181, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20357924699783325, | |
| "step": 975, | |
| "valid_targets_mean": 1422.4, | |
| "valid_targets_min": 708 | |
| }, | |
| { | |
| "epoch": 2.407862407862408, | |
| "grad_norm": 0.842283712167908, | |
| "learning_rate": 2.9958692637450406e-05, | |
| "loss": 0.1765, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16421157121658325, | |
| "step": 980, | |
| "valid_targets_mean": 1248.3, | |
| "valid_targets_min": 643 | |
| }, | |
| { | |
| "epoch": 2.42014742014742, | |
| "grad_norm": 0.9134695840177238, | |
| "learning_rate": 2.983443210655714e-05, | |
| "loss": 0.1755, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18048323690891266, | |
| "step": 985, | |
| "valid_targets_mean": 1289.1, | |
| "valid_targets_min": 682 | |
| }, | |
| { | |
| "epoch": 2.4324324324324325, | |
| "grad_norm": 0.78277700193818, | |
| "learning_rate": 2.9709668854606706e-05, | |
| "loss": 0.1907, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19176796078681946, | |
| "step": 990, | |
| "valid_targets_mean": 1440.7, | |
| "valid_targets_min": 614 | |
| }, | |
| { | |
| "epoch": 2.444717444717445, | |
| "grad_norm": 0.7557596574571973, | |
| "learning_rate": 2.9584409259304828e-05, | |
| "loss": 0.1838, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1759551763534546, | |
| "step": 995, | |
| "valid_targets_mean": 1373.0, | |
| "valid_targets_min": 770 | |
| }, | |
| { | |
| "epoch": 2.457002457002457, | |
| "grad_norm": 0.7832882732251863, | |
| "learning_rate": 2.945865972372954e-05, | |
| "loss": 0.1811, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18369890749454498, | |
| "step": 1000, | |
| "valid_targets_mean": 1467.0, | |
| "valid_targets_min": 958 | |
| }, | |
| { | |
| "epoch": 2.4692874692874693, | |
| "grad_norm": 0.7861430289740864, | |
| "learning_rate": 2.9332426676003858e-05, | |
| "loss": 0.1839, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1779637336730957, | |
| "step": 1005, | |
| "valid_targets_mean": 1405.0, | |
| "valid_targets_min": 802 | |
| }, | |
| { | |
| "epoch": 2.4815724815724813, | |
| "grad_norm": 0.8111426620582084, | |
| "learning_rate": 2.920571656896722e-05, | |
| "loss": 0.179, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17759689688682556, | |
| "step": 1010, | |
| "valid_targets_mean": 1309.1, | |
| "valid_targets_min": 782 | |
| }, | |
| { | |
| "epoch": 2.493857493857494, | |
| "grad_norm": 1.2589419246450955, | |
| "learning_rate": 2.907853587984558e-05, | |
| "loss": 0.1833, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1891094595193863, | |
| "step": 1015, | |
| "valid_targets_mean": 1459.9, | |
| "valid_targets_min": 685 | |
| }, | |
| { | |
| "epoch": 2.506142506142506, | |
| "grad_norm": 0.7924359158589853, | |
| "learning_rate": 2.8950891109920333e-05, | |
| "loss": 0.1816, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18550589680671692, | |
| "step": 1020, | |
| "valid_targets_mean": 1363.8, | |
| "valid_targets_min": 770 | |
| }, | |
| { | |
| "epoch": 2.5184275184275187, | |
| "grad_norm": 0.7781338013688867, | |
| "learning_rate": 2.882278878419597e-05, | |
| "loss": 0.1771, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17955437302589417, | |
| "step": 1025, | |
| "valid_targets_mean": 1293.1, | |
| "valid_targets_min": 579 | |
| }, | |
| { | |
| "epoch": 2.5307125307125307, | |
| "grad_norm": 0.7625587872887897, | |
| "learning_rate": 2.8694235451066538e-05, | |
| "loss": 0.1818, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1869661509990692, | |
| "step": 1030, | |
| "valid_targets_mean": 1573.0, | |
| "valid_targets_min": 856 | |
| }, | |
| { | |
| "epoch": 2.542997542997543, | |
| "grad_norm": 0.7506926555973574, | |
| "learning_rate": 2.8565237681980876e-05, | |
| "loss": 0.1739, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18061798810958862, | |
| "step": 1035, | |
| "valid_targets_mean": 1425.1, | |
| "valid_targets_min": 836 | |
| }, | |
| { | |
| "epoch": 2.555282555282555, | |
| "grad_norm": 0.7378617960639904, | |
| "learning_rate": 2.843580207110672e-05, | |
| "loss": 0.1733, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1678646206855774, | |
| "step": 1040, | |
| "valid_targets_mean": 1410.6, | |
| "valid_targets_min": 736 | |
| }, | |
| { | |
| "epoch": 2.5675675675675675, | |
| "grad_norm": 0.718433902732542, | |
| "learning_rate": 2.830593523499361e-05, | |
| "loss": 0.1819, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18304452300071716, | |
| "step": 1045, | |
| "valid_targets_mean": 1439.1, | |
| "valid_targets_min": 771 | |
| }, | |
| { | |
| "epoch": 2.57985257985258, | |
| "grad_norm": 0.774505553162406, | |
| "learning_rate": 2.8175643812234627e-05, | |
| "loss": 0.1798, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17948007583618164, | |
| "step": 1050, | |
| "valid_targets_mean": 1492.2, | |
| "valid_targets_min": 826 | |
| }, | |
| { | |
| "epoch": 2.592137592137592, | |
| "grad_norm": 0.8317776563831498, | |
| "learning_rate": 2.8044934463127108e-05, | |
| "loss": 0.182, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18954980373382568, | |
| "step": 1055, | |
| "valid_targets_mean": 1261.8, | |
| "valid_targets_min": 759 | |
| }, | |
| { | |
| "epoch": 2.6044226044226044, | |
| "grad_norm": 0.8628717282980836, | |
| "learning_rate": 2.7913813869332112e-05, | |
| "loss": 0.1786, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17699384689331055, | |
| "step": 1060, | |
| "valid_targets_mean": 1296.4, | |
| "valid_targets_min": 654 | |
| }, | |
| { | |
| "epoch": 2.616707616707617, | |
| "grad_norm": 0.7714101259201903, | |
| "learning_rate": 2.7782288733532915e-05, | |
| "loss": 0.1803, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1735752522945404, | |
| "step": 1065, | |
| "valid_targets_mean": 1234.5, | |
| "valid_targets_min": 671 | |
| }, | |
| { | |
| "epoch": 2.628992628992629, | |
| "grad_norm": 0.8243093815645816, | |
| "learning_rate": 2.7650365779092346e-05, | |
| "loss": 0.1777, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17274951934814453, | |
| "step": 1070, | |
| "valid_targets_mean": 1255.4, | |
| "valid_targets_min": 632 | |
| }, | |
| { | |
| "epoch": 2.6412776412776413, | |
| "grad_norm": 0.853204791453013, | |
| "learning_rate": 2.751805174970912e-05, | |
| "loss": 0.1831, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1960735321044922, | |
| "step": 1075, | |
| "valid_targets_mean": 1437.3, | |
| "valid_targets_min": 724 | |
| }, | |
| { | |
| "epoch": 2.6535626535626538, | |
| "grad_norm": 0.8709085225146835, | |
| "learning_rate": 2.7385353409073093e-05, | |
| "loss": 0.184, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1868162602186203, | |
| "step": 1080, | |
| "valid_targets_mean": 1224.4, | |
| "valid_targets_min": 626 | |
| }, | |
| { | |
| "epoch": 2.6658476658476657, | |
| "grad_norm": 0.8182092732134453, | |
| "learning_rate": 2.725227754051953e-05, | |
| "loss": 0.1829, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1859986037015915, | |
| "step": 1085, | |
| "valid_targets_mean": 1351.3, | |
| "valid_targets_min": 711 | |
| }, | |
| { | |
| "epoch": 2.678132678132678, | |
| "grad_norm": 0.8089978851708691, | |
| "learning_rate": 2.711883094668234e-05, | |
| "loss": 0.1762, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16891303658485413, | |
| "step": 1090, | |
| "valid_targets_mean": 1228.1, | |
| "valid_targets_min": 571 | |
| }, | |
| { | |
| "epoch": 2.69041769041769, | |
| "grad_norm": 0.7864069301434866, | |
| "learning_rate": 2.698502044914633e-05, | |
| "loss": 0.1737, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17922261357307434, | |
| "step": 1095, | |
| "valid_targets_mean": 1298.4, | |
| "valid_targets_min": 814 | |
| }, | |
| { | |
| "epoch": 2.7027027027027026, | |
| "grad_norm": 0.8397618594581016, | |
| "learning_rate": 2.685085288809853e-05, | |
| "loss": 0.1807, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1880117654800415, | |
| "step": 1100, | |
| "valid_targets_mean": 1326.9, | |
| "valid_targets_min": 763 | |
| }, | |
| { | |
| "epoch": 2.714987714987715, | |
| "grad_norm": 0.8106546898654485, | |
| "learning_rate": 2.671633512197848e-05, | |
| "loss": 0.184, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18558436632156372, | |
| "step": 1105, | |
| "valid_targets_mean": 1511.4, | |
| "valid_targets_min": 1088 | |
| }, | |
| { | |
| "epoch": 2.7272727272727275, | |
| "grad_norm": 0.7423832954434214, | |
| "learning_rate": 2.658147402712768e-05, | |
| "loss": 0.1797, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18136043846607208, | |
| "step": 1110, | |
| "valid_targets_mean": 1476.5, | |
| "valid_targets_min": 968 | |
| }, | |
| { | |
| "epoch": 2.7395577395577395, | |
| "grad_norm": 0.8827736567647659, | |
| "learning_rate": 2.6446276497438064e-05, | |
| "loss": 0.178, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1853279173374176, | |
| "step": 1115, | |
| "valid_targets_mean": 1164.2, | |
| "valid_targets_min": 700 | |
| }, | |
| { | |
| "epoch": 2.751842751842752, | |
| "grad_norm": 0.8530921123592021, | |
| "learning_rate": 2.6310749443999593e-05, | |
| "loss": 0.1848, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19187986850738525, | |
| "step": 1120, | |
| "valid_targets_mean": 1148.2, | |
| "valid_targets_min": 782 | |
| }, | |
| { | |
| "epoch": 2.764127764127764, | |
| "grad_norm": 0.8668139337353052, | |
| "learning_rate": 2.617489979474699e-05, | |
| "loss": 0.1799, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18601025640964508, | |
| "step": 1125, | |
| "valid_targets_mean": 1221.2, | |
| "valid_targets_min": 620 | |
| }, | |
| { | |
| "epoch": 2.7764127764127764, | |
| "grad_norm": 0.7450617060660113, | |
| "learning_rate": 2.6038734494105562e-05, | |
| "loss": 0.183, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1915477216243744, | |
| "step": 1130, | |
| "valid_targets_mean": 1645.6, | |
| "valid_targets_min": 865 | |
| }, | |
| { | |
| "epoch": 2.788697788697789, | |
| "grad_norm": 1.0709919375671835, | |
| "learning_rate": 2.590226050263625e-05, | |
| "loss": 0.1741, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17151376605033875, | |
| "step": 1135, | |
| "valid_targets_mean": 1232.6, | |
| "valid_targets_min": 710 | |
| }, | |
| { | |
| "epoch": 2.800982800982801, | |
| "grad_norm": 0.9395916215750874, | |
| "learning_rate": 2.5765484796679768e-05, | |
| "loss": 0.1775, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19401276111602783, | |
| "step": 1140, | |
| "valid_targets_mean": 1251.2, | |
| "valid_targets_min": 674 | |
| }, | |
| { | |
| "epoch": 2.8132678132678133, | |
| "grad_norm": 0.77766862539295, | |
| "learning_rate": 2.5628414368000035e-05, | |
| "loss": 0.1839, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17841924726963043, | |
| "step": 1145, | |
| "valid_targets_mean": 1480.1, | |
| "valid_targets_min": 784 | |
| }, | |
| { | |
| "epoch": 2.8255528255528253, | |
| "grad_norm": 0.7357963334117039, | |
| "learning_rate": 2.5491056223426746e-05, | |
| "loss": 0.1854, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18812662363052368, | |
| "step": 1150, | |
| "valid_targets_mean": 1456.2, | |
| "valid_targets_min": 873 | |
| }, | |
| { | |
| "epoch": 2.8378378378378377, | |
| "grad_norm": 0.8719247967657947, | |
| "learning_rate": 2.5353417384497166e-05, | |
| "loss": 0.1795, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18741199374198914, | |
| "step": 1155, | |
| "valid_targets_mean": 1129.2, | |
| "valid_targets_min": 631 | |
| }, | |
| { | |
| "epoch": 2.85012285012285, | |
| "grad_norm": 0.8560317512664247, | |
| "learning_rate": 2.5215504887097243e-05, | |
| "loss": 0.1834, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21094030141830444, | |
| "step": 1160, | |
| "valid_targets_mean": 1318.6, | |
| "valid_targets_min": 683 | |
| }, | |
| { | |
| "epoch": 2.8624078624078626, | |
| "grad_norm": 0.7688854447408984, | |
| "learning_rate": 2.5077325781101918e-05, | |
| "loss": 0.1833, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1742001473903656, | |
| "step": 1165, | |
| "valid_targets_mean": 1339.2, | |
| "valid_targets_min": 582 | |
| }, | |
| { | |
| "epoch": 2.8746928746928746, | |
| "grad_norm": 0.8580920147933981, | |
| "learning_rate": 2.493888713001476e-05, | |
| "loss": 0.1802, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18028295040130615, | |
| "step": 1170, | |
| "valid_targets_mean": 1284.4, | |
| "valid_targets_min": 691 | |
| }, | |
| { | |
| "epoch": 2.886977886977887, | |
| "grad_norm": 0.8777985054766075, | |
| "learning_rate": 2.480019601060687e-05, | |
| "loss": 0.1789, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1970350742340088, | |
| "step": 1175, | |
| "valid_targets_mean": 1382.4, | |
| "valid_targets_min": 830 | |
| }, | |
| { | |
| "epoch": 2.899262899262899, | |
| "grad_norm": 0.748951092611668, | |
| "learning_rate": 2.4661259512555176e-05, | |
| "loss": 0.1808, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1815527379512787, | |
| "step": 1180, | |
| "valid_targets_mean": 1400.8, | |
| "valid_targets_min": 1080 | |
| }, | |
| { | |
| "epoch": 2.9115479115479115, | |
| "grad_norm": 0.8266596371485131, | |
| "learning_rate": 2.4522084738079933e-05, | |
| "loss": 0.1782, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19076308608055115, | |
| "step": 1185, | |
| "valid_targets_mean": 1342.9, | |
| "valid_targets_min": 773 | |
| }, | |
| { | |
| "epoch": 2.923832923832924, | |
| "grad_norm": 0.8086591628783545, | |
| "learning_rate": 2.4382678801581762e-05, | |
| "loss": 0.1759, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16497498750686646, | |
| "step": 1190, | |
| "valid_targets_mean": 1346.6, | |
| "valid_targets_min": 633 | |
| }, | |
| { | |
| "epoch": 2.9361179361179364, | |
| "grad_norm": 0.7371686211674034, | |
| "learning_rate": 2.4243048829277916e-05, | |
| "loss": 0.1844, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16869500279426575, | |
| "step": 1195, | |
| "valid_targets_mean": 1398.8, | |
| "valid_targets_min": 615 | |
| }, | |
| { | |
| "epoch": 2.9484029484029484, | |
| "grad_norm": 0.850105094932572, | |
| "learning_rate": 2.410320195883802e-05, | |
| "loss": 0.173, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18150202929973602, | |
| "step": 1200, | |
| "valid_targets_mean": 1261.6, | |
| "valid_targets_min": 576 | |
| }, | |
| { | |
| "epoch": 2.960687960687961, | |
| "grad_norm": 0.8008511400738234, | |
| "learning_rate": 2.396314533901918e-05, | |
| "loss": 0.1783, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17443349957466125, | |
| "step": 1205, | |
| "valid_targets_mean": 1265.3, | |
| "valid_targets_min": 605 | |
| }, | |
| { | |
| "epoch": 2.972972972972973, | |
| "grad_norm": 0.7979152549472199, | |
| "learning_rate": 2.3822886129300603e-05, | |
| "loss": 0.1808, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18702928721904755, | |
| "step": 1210, | |
| "valid_targets_mean": 1393.0, | |
| "valid_targets_min": 898 | |
| }, | |
| { | |
| "epoch": 2.9852579852579852, | |
| "grad_norm": 0.8882030454470853, | |
| "learning_rate": 2.368243149951755e-05, | |
| "loss": 0.1829, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18368935585021973, | |
| "step": 1215, | |
| "valid_targets_mean": 1184.1, | |
| "valid_targets_min": 623 | |
| }, | |
| { | |
| "epoch": 2.9975429975429977, | |
| "grad_norm": 1.190171038096041, | |
| "learning_rate": 2.3541788629494865e-05, | |
| "loss": 0.1823, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17229348421096802, | |
| "step": 1220, | |
| "valid_targets_mean": 1348.9, | |
| "valid_targets_min": 635 | |
| }, | |
| { | |
| "epoch": 3.0098280098280097, | |
| "grad_norm": 0.7307342908181303, | |
| "learning_rate": 2.3400964708679944e-05, | |
| "loss": 0.1541, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1386469602584839, | |
| "step": 1225, | |
| "valid_targets_mean": 1282.5, | |
| "valid_targets_min": 651 | |
| }, | |
| { | |
| "epoch": 3.022113022113022, | |
| "grad_norm": 0.8613928990233313, | |
| "learning_rate": 2.325996693577522e-05, | |
| "loss": 0.1586, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1562415510416031, | |
| "step": 1230, | |
| "valid_targets_mean": 1422.1, | |
| "valid_targets_min": 799 | |
| }, | |
| { | |
| "epoch": 3.0343980343980346, | |
| "grad_norm": 0.8621373770097498, | |
| "learning_rate": 2.311880251837019e-05, | |
| "loss": 0.1531, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15122167766094208, | |
| "step": 1235, | |
| "valid_targets_mean": 1260.6, | |
| "valid_targets_min": 696 | |
| }, | |
| { | |
| "epoch": 3.0466830466830466, | |
| "grad_norm": 0.8300892116809224, | |
| "learning_rate": 2.2977478672572933e-05, | |
| "loss": 0.1546, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15844044089317322, | |
| "step": 1240, | |
| "valid_targets_mean": 1366.7, | |
| "valid_targets_min": 666 | |
| }, | |
| { | |
| "epoch": 3.058968058968059, | |
| "grad_norm": 0.7976485770535675, | |
| "learning_rate": 2.2836002622641297e-05, | |
| "loss": 0.1504, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16169382631778717, | |
| "step": 1245, | |
| "valid_targets_mean": 1357.0, | |
| "valid_targets_min": 695 | |
| }, | |
| { | |
| "epoch": 3.0712530712530715, | |
| "grad_norm": 0.8216349889588943, | |
| "learning_rate": 2.269438160061354e-05, | |
| "loss": 0.1537, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14585554599761963, | |
| "step": 1250, | |
| "valid_targets_mean": 1295.6, | |
| "valid_targets_min": 810 | |
| }, | |
| { | |
| "epoch": 3.0835380835380835, | |
| "grad_norm": 0.8346060124980336, | |
| "learning_rate": 2.2552622845938698e-05, | |
| "loss": 0.1562, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1618095487356186, | |
| "step": 1255, | |
| "valid_targets_mean": 1529.2, | |
| "valid_targets_min": 857 | |
| }, | |
| { | |
| "epoch": 3.095823095823096, | |
| "grad_norm": 0.8726538534903174, | |
| "learning_rate": 2.2410733605106462e-05, | |
| "loss": 0.1578, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1641627848148346, | |
| "step": 1260, | |
| "valid_targets_mean": 1204.4, | |
| "valid_targets_min": 604 | |
| }, | |
| { | |
| "epoch": 3.108108108108108, | |
| "grad_norm": 0.8397356110687504, | |
| "learning_rate": 2.2268721131276805e-05, | |
| "loss": 0.1523, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15571998059749603, | |
| "step": 1265, | |
| "valid_targets_mean": 1444.3, | |
| "valid_targets_min": 747 | |
| }, | |
| { | |
| "epoch": 3.1203931203931203, | |
| "grad_norm": 0.885409483392303, | |
| "learning_rate": 2.2126592683909154e-05, | |
| "loss": 0.1504, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15786957740783691, | |
| "step": 1270, | |
| "valid_targets_mean": 1228.3, | |
| "valid_targets_min": 614 | |
| }, | |
| { | |
| "epoch": 3.1326781326781328, | |
| "grad_norm": 0.8308229666253899, | |
| "learning_rate": 2.1984355528391342e-05, | |
| "loss": 0.1532, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15812982618808746, | |
| "step": 1275, | |
| "valid_targets_mean": 1397.4, | |
| "valid_targets_min": 758 | |
| }, | |
| { | |
| "epoch": 3.1449631449631448, | |
| "grad_norm": 0.8798682108503517, | |
| "learning_rate": 2.1842016935668188e-05, | |
| "loss": 0.1525, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1703539490699768, | |
| "step": 1280, | |
| "valid_targets_mean": 1452.6, | |
| "valid_targets_min": 746 | |
| }, | |
| { | |
| "epoch": 3.157248157248157, | |
| "grad_norm": 0.8714179652930998, | |
| "learning_rate": 2.169958418186982e-05, | |
| "loss": 0.1618, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15718436241149902, | |
| "step": 1285, | |
| "valid_targets_mean": 1294.4, | |
| "valid_targets_min": 738 | |
| }, | |
| { | |
| "epoch": 3.1695331695331697, | |
| "grad_norm": 0.8091176874493534, | |
| "learning_rate": 2.1557064547939754e-05, | |
| "loss": 0.1579, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14591258764266968, | |
| "step": 1290, | |
| "valid_targets_mean": 1266.2, | |
| "valid_targets_min": 810 | |
| }, | |
| { | |
| "epoch": 3.1818181818181817, | |
| "grad_norm": 0.7781842247742055, | |
| "learning_rate": 2.1414465319262666e-05, | |
| "loss": 0.1575, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14745178818702698, | |
| "step": 1295, | |
| "valid_targets_mean": 1345.4, | |
| "valid_targets_min": 681 | |
| }, | |
| { | |
| "epoch": 3.194103194103194, | |
| "grad_norm": 0.7649531239864251, | |
| "learning_rate": 2.1271793785291997e-05, | |
| "loss": 0.15, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14022435247898102, | |
| "step": 1300, | |
| "valid_targets_mean": 1342.6, | |
| "valid_targets_min": 602 | |
| }, | |
| { | |
| "epoch": 3.2063882063882065, | |
| "grad_norm": 0.8883698563769529, | |
| "learning_rate": 2.1129057239177337e-05, | |
| "loss": 0.1577, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1590563952922821, | |
| "step": 1305, | |
| "valid_targets_mean": 1261.3, | |
| "valid_targets_min": 748 | |
| }, | |
| { | |
| "epoch": 3.2186732186732185, | |
| "grad_norm": 0.8407539759944053, | |
| "learning_rate": 2.0986262977391577e-05, | |
| "loss": 0.1548, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15316829085350037, | |
| "step": 1310, | |
| "valid_targets_mean": 1228.0, | |
| "valid_targets_min": 728 | |
| }, | |
| { | |
| "epoch": 3.230958230958231, | |
| "grad_norm": 0.8493111864349429, | |
| "learning_rate": 2.084341829935796e-05, | |
| "loss": 0.1536, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16053800284862518, | |
| "step": 1315, | |
| "valid_targets_mean": 1279.6, | |
| "valid_targets_min": 664 | |
| }, | |
| { | |
| "epoch": 3.2432432432432434, | |
| "grad_norm": 0.7978902338142638, | |
| "learning_rate": 2.0700530507076916e-05, | |
| "loss": 0.1471, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14525769650936127, | |
| "step": 1320, | |
| "valid_targets_mean": 1242.2, | |
| "valid_targets_min": 807 | |
| }, | |
| { | |
| "epoch": 3.2555282555282554, | |
| "grad_norm": 0.8688719746475381, | |
| "learning_rate": 2.0557606904752833e-05, | |
| "loss": 0.1536, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15545523166656494, | |
| "step": 1325, | |
| "valid_targets_mean": 1201.7, | |
| "valid_targets_min": 474 | |
| }, | |
| { | |
| "epoch": 3.267813267813268, | |
| "grad_norm": 0.8057273667346481, | |
| "learning_rate": 2.0414654798420622e-05, | |
| "loss": 0.1452, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1586393266916275, | |
| "step": 1330, | |
| "valid_targets_mean": 1397.9, | |
| "valid_targets_min": 772 | |
| }, | |
| { | |
| "epoch": 3.2800982800982803, | |
| "grad_norm": 0.816284599146468, | |
| "learning_rate": 2.02716814955723e-05, | |
| "loss": 0.1557, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14593248069286346, | |
| "step": 1335, | |
| "valid_targets_mean": 1254.7, | |
| "valid_targets_min": 597 | |
| }, | |
| { | |
| "epoch": 3.2923832923832923, | |
| "grad_norm": 0.8571051831976274, | |
| "learning_rate": 2.0128694304783406e-05, | |
| "loss": 0.1569, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15648102760314941, | |
| "step": 1340, | |
| "valid_targets_mean": 1235.6, | |
| "valid_targets_min": 730 | |
| }, | |
| { | |
| "epoch": 3.3046683046683047, | |
| "grad_norm": 0.9070549086061275, | |
| "learning_rate": 1.9985700535339406e-05, | |
| "loss": 0.1595, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15327666699886322, | |
| "step": 1345, | |
| "valid_targets_mean": 1168.9, | |
| "valid_targets_min": 845 | |
| }, | |
| { | |
| "epoch": 3.3169533169533167, | |
| "grad_norm": 0.7807560780429057, | |
| "learning_rate": 1.984270749686207e-05, | |
| "loss": 0.1552, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13311003148555756, | |
| "step": 1350, | |
| "valid_targets_mean": 1288.8, | |
| "valid_targets_min": 536 | |
| }, | |
| { | |
| "epoch": 3.329238329238329, | |
| "grad_norm": 0.865620018859759, | |
| "learning_rate": 1.9699722498935786e-05, | |
| "loss": 0.1542, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15494976937770844, | |
| "step": 1355, | |
| "valid_targets_mean": 1236.1, | |
| "valid_targets_min": 563 | |
| }, | |
| { | |
| "epoch": 3.3415233415233416, | |
| "grad_norm": 0.7924135892447138, | |
| "learning_rate": 1.9556752850733933e-05, | |
| "loss": 0.1522, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1397174596786499, | |
| "step": 1360, | |
| "valid_targets_mean": 1311.5, | |
| "valid_targets_min": 706 | |
| }, | |
| { | |
| "epoch": 3.3538083538083536, | |
| "grad_norm": 0.8814662356463719, | |
| "learning_rate": 1.9413805860645242e-05, | |
| "loss": 0.1623, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17522946000099182, | |
| "step": 1365, | |
| "valid_targets_mean": 1309.7, | |
| "valid_targets_min": 692 | |
| }, | |
| { | |
| "epoch": 3.366093366093366, | |
| "grad_norm": 0.8496535117552688, | |
| "learning_rate": 1.9270888835900165e-05, | |
| "loss": 0.163, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16273535788059235, | |
| "step": 1370, | |
| "valid_targets_mean": 1285.9, | |
| "valid_targets_min": 700 | |
| }, | |
| { | |
| "epoch": 3.3783783783783785, | |
| "grad_norm": 0.8307356132401741, | |
| "learning_rate": 1.9128009082197417e-05, | |
| "loss": 0.1514, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14840710163116455, | |
| "step": 1375, | |
| "valid_targets_mean": 1197.5, | |
| "valid_targets_min": 685 | |
| }, | |
| { | |
| "epoch": 3.3906633906633905, | |
| "grad_norm": 0.7936007911158947, | |
| "learning_rate": 1.8985173903330428e-05, | |
| "loss": 0.1534, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1535969078540802, | |
| "step": 1380, | |
| "valid_targets_mean": 1328.5, | |
| "valid_targets_min": 494 | |
| }, | |
| { | |
| "epoch": 3.402948402948403, | |
| "grad_norm": 0.8258515892814148, | |
| "learning_rate": 1.884239060081407e-05, | |
| "loss": 0.1455, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14214763045310974, | |
| "step": 1385, | |
| "valid_targets_mean": 1239.3, | |
| "valid_targets_min": 808 | |
| }, | |
| { | |
| "epoch": 3.4152334152334154, | |
| "grad_norm": 0.8438817045136049, | |
| "learning_rate": 1.869966647351135e-05, | |
| "loss": 0.1555, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14923638105392456, | |
| "step": 1390, | |
| "valid_targets_mean": 1271.1, | |
| "valid_targets_min": 667 | |
| }, | |
| { | |
| "epoch": 3.4275184275184274, | |
| "grad_norm": 0.7888928161251536, | |
| "learning_rate": 1.8557008817260343e-05, | |
| "loss": 0.1561, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14702829718589783, | |
| "step": 1395, | |
| "valid_targets_mean": 1379.3, | |
| "valid_targets_min": 871 | |
| }, | |
| { | |
| "epoch": 3.43980343980344, | |
| "grad_norm": 0.8289520041059483, | |
| "learning_rate": 1.8414424924501222e-05, | |
| "loss": 0.1615, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14874780178070068, | |
| "step": 1400, | |
| "valid_targets_mean": 1413.8, | |
| "valid_targets_min": 681 | |
| }, | |
| { | |
| "epoch": 3.4520884520884523, | |
| "grad_norm": 0.8175357142071188, | |
| "learning_rate": 1.827192208390347e-05, | |
| "loss": 0.149, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14862152934074402, | |
| "step": 1405, | |
| "valid_targets_mean": 1303.1, | |
| "valid_targets_min": 532 | |
| }, | |
| { | |
| "epoch": 3.4643734643734643, | |
| "grad_norm": 0.8283358330296076, | |
| "learning_rate": 1.812950757999334e-05, | |
| "loss": 0.1579, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14382848143577576, | |
| "step": 1410, | |
| "valid_targets_mean": 1249.4, | |
| "valid_targets_min": 516 | |
| }, | |
| { | |
| "epoch": 3.4766584766584767, | |
| "grad_norm": 0.8311916600493378, | |
| "learning_rate": 1.7987188692781417e-05, | |
| "loss": 0.1512, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1518518328666687, | |
| "step": 1415, | |
| "valid_targets_mean": 1345.8, | |
| "valid_targets_min": 828 | |
| }, | |
| { | |
| "epoch": 3.488943488943489, | |
| "grad_norm": 0.8139723233277028, | |
| "learning_rate": 1.784497269739052e-05, | |
| "loss": 0.1536, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1457878053188324, | |
| "step": 1420, | |
| "valid_targets_mean": 1384.6, | |
| "valid_targets_min": 477 | |
| }, | |
| { | |
| "epoch": 3.501228501228501, | |
| "grad_norm": 0.8996346359556405, | |
| "learning_rate": 1.770286686368381e-05, | |
| "loss": 0.1527, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15588831901550293, | |
| "step": 1425, | |
| "valid_targets_mean": 1237.2, | |
| "valid_targets_min": 755 | |
| }, | |
| { | |
| "epoch": 3.5135135135135136, | |
| "grad_norm": 0.8267568396171764, | |
| "learning_rate": 1.756087845589312e-05, | |
| "loss": 0.1572, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14774084091186523, | |
| "step": 1430, | |
| "valid_targets_mean": 1236.9, | |
| "valid_targets_min": 599 | |
| }, | |
| { | |
| "epoch": 3.5257985257985256, | |
| "grad_norm": 0.8823476921788762, | |
| "learning_rate": 1.7419014732247683e-05, | |
| "loss": 0.1592, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16577738523483276, | |
| "step": 1435, | |
| "valid_targets_mean": 1305.9, | |
| "valid_targets_min": 736 | |
| }, | |
| { | |
| "epoch": 3.538083538083538, | |
| "grad_norm": 0.8495429903451098, | |
| "learning_rate": 1.7277282944603047e-05, | |
| "loss": 0.1624, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1646609604358673, | |
| "step": 1440, | |
| "valid_targets_mean": 1440.2, | |
| "valid_targets_min": 700 | |
| }, | |
| { | |
| "epoch": 3.5503685503685505, | |
| "grad_norm": 0.8859222575437841, | |
| "learning_rate": 1.713569033807041e-05, | |
| "loss": 0.1538, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14450153708457947, | |
| "step": 1445, | |
| "valid_targets_mean": 1183.0, | |
| "valid_targets_min": 516 | |
| }, | |
| { | |
| "epoch": 3.562653562653563, | |
| "grad_norm": 0.7258677661153138, | |
| "learning_rate": 1.6994244150646244e-05, | |
| "loss": 0.1497, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.136446014046669, | |
| "step": 1450, | |
| "valid_targets_mean": 1456.6, | |
| "valid_targets_min": 618 | |
| }, | |
| { | |
| "epoch": 3.574938574938575, | |
| "grad_norm": 1.1189221159298355, | |
| "learning_rate": 1.6852951612842278e-05, | |
| "loss": 0.1537, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15197353065013885, | |
| "step": 1455, | |
| "valid_targets_mean": 1427.3, | |
| "valid_targets_min": 922 | |
| }, | |
| { | |
| "epoch": 3.5872235872235874, | |
| "grad_norm": 0.7977314412730174, | |
| "learning_rate": 1.671181994731595e-05, | |
| "loss": 0.1533, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14697697758674622, | |
| "step": 1460, | |
| "valid_targets_mean": 1319.3, | |
| "valid_targets_min": 755 | |
| }, | |
| { | |
| "epoch": 3.5995085995085994, | |
| "grad_norm": 0.8915240895713842, | |
| "learning_rate": 1.6570856368501108e-05, | |
| "loss": 0.1608, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18134114146232605, | |
| "step": 1465, | |
| "valid_targets_mean": 1353.6, | |
| "valid_targets_min": 640 | |
| }, | |
| { | |
| "epoch": 3.611793611793612, | |
| "grad_norm": 0.8454843017740421, | |
| "learning_rate": 1.643006808223931e-05, | |
| "loss": 0.1518, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1447536051273346, | |
| "step": 1470, | |
| "valid_targets_mean": 1331.5, | |
| "valid_targets_min": 576 | |
| }, | |
| { | |
| "epoch": 3.6240786240786242, | |
| "grad_norm": 0.8680883182471578, | |
| "learning_rate": 1.6289462285411387e-05, | |
| "loss": 0.1508, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1568550318479538, | |
| "step": 1475, | |
| "valid_targets_mean": 1267.2, | |
| "valid_targets_min": 754 | |
| }, | |
| { | |
| "epoch": 3.6363636363636362, | |
| "grad_norm": 0.7826701135858078, | |
| "learning_rate": 1.614904616556962e-05, | |
| "loss": 0.1452, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14433687925338745, | |
| "step": 1480, | |
| "valid_targets_mean": 1380.3, | |
| "valid_targets_min": 684 | |
| }, | |
| { | |
| "epoch": 3.6486486486486487, | |
| "grad_norm": 0.8760757837044783, | |
| "learning_rate": 1.6008826900570294e-05, | |
| "loss": 0.1509, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14260855317115784, | |
| "step": 1485, | |
| "valid_targets_mean": 1139.2, | |
| "valid_targets_min": 699 | |
| }, | |
| { | |
| "epoch": 3.6609336609336607, | |
| "grad_norm": 0.8606176442709622, | |
| "learning_rate": 1.586881165820675e-05, | |
| "loss": 0.1547, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15521323680877686, | |
| "step": 1490, | |
| "valid_targets_mean": 1231.2, | |
| "valid_targets_min": 621 | |
| }, | |
| { | |
| "epoch": 3.673218673218673, | |
| "grad_norm": 0.8342760865015145, | |
| "learning_rate": 1.5729007595843037e-05, | |
| "loss": 0.1518, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14267314970493317, | |
| "step": 1495, | |
| "valid_targets_mean": 1268.3, | |
| "valid_targets_min": 776 | |
| }, | |
| { | |
| "epoch": 3.6855036855036856, | |
| "grad_norm": 0.7940729892895161, | |
| "learning_rate": 1.5589421860047986e-05, | |
| "loss": 0.1561, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15548819303512573, | |
| "step": 1500, | |
| "valid_targets_mean": 1399.1, | |
| "valid_targets_min": 868 | |
| }, | |
| { | |
| "epoch": 3.697788697788698, | |
| "grad_norm": 0.9348949881247557, | |
| "learning_rate": 1.5450061586229903e-05, | |
| "loss": 0.1569, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15649253129959106, | |
| "step": 1505, | |
| "valid_targets_mean": 1175.8, | |
| "valid_targets_min": 679 | |
| }, | |
| { | |
| "epoch": 3.71007371007371, | |
| "grad_norm": 0.9419666196048564, | |
| "learning_rate": 1.5310933898271864e-05, | |
| "loss": 0.1525, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15512649714946747, | |
| "step": 1510, | |
| "valid_targets_mean": 1268.0, | |
| "valid_targets_min": 627 | |
| }, | |
| { | |
| "epoch": 3.7223587223587224, | |
| "grad_norm": 1.2144373410107168, | |
| "learning_rate": 1.5172045908167462e-05, | |
| "loss": 0.1502, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1509784758090973, | |
| "step": 1515, | |
| "valid_targets_mean": 1296.2, | |
| "valid_targets_min": 619 | |
| }, | |
| { | |
| "epoch": 3.7346437346437344, | |
| "grad_norm": 0.8618101037036396, | |
| "learning_rate": 1.5033404715657344e-05, | |
| "loss": 0.1522, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14538899064064026, | |
| "step": 1520, | |
| "valid_targets_mean": 1261.3, | |
| "valid_targets_min": 639 | |
| }, | |
| { | |
| "epoch": 3.746928746928747, | |
| "grad_norm": 0.9088753024739327, | |
| "learning_rate": 1.4895017407866217e-05, | |
| "loss": 0.1584, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1564379334449768, | |
| "step": 1525, | |
| "valid_targets_mean": 1103.7, | |
| "valid_targets_min": 578 | |
| }, | |
| { | |
| "epoch": 3.7592137592137593, | |
| "grad_norm": 0.7941352382501426, | |
| "learning_rate": 1.4756891058940606e-05, | |
| "loss": 0.1589, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1650761067867279, | |
| "step": 1530, | |
| "valid_targets_mean": 1470.4, | |
| "valid_targets_min": 631 | |
| }, | |
| { | |
| "epoch": 3.7714987714987718, | |
| "grad_norm": 0.8795501599277145, | |
| "learning_rate": 1.4619032729687223e-05, | |
| "loss": 0.1534, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15996554493904114, | |
| "step": 1535, | |
| "valid_targets_mean": 1394.7, | |
| "valid_targets_min": 703 | |
| }, | |
| { | |
| "epoch": 3.7837837837837838, | |
| "grad_norm": 0.9087496627648661, | |
| "learning_rate": 1.4481449467212004e-05, | |
| "loss": 0.1519, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1490277647972107, | |
| "step": 1540, | |
| "valid_targets_mean": 1197.9, | |
| "valid_targets_min": 661 | |
| }, | |
| { | |
| "epoch": 3.796068796068796, | |
| "grad_norm": 0.8345727778883328, | |
| "learning_rate": 1.4344148304559926e-05, | |
| "loss": 0.1523, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15558528900146484, | |
| "step": 1545, | |
| "valid_targets_mean": 1394.4, | |
| "valid_targets_min": 930 | |
| }, | |
| { | |
| "epoch": 3.808353808353808, | |
| "grad_norm": 0.7993180767117398, | |
| "learning_rate": 1.4207136260355426e-05, | |
| "loss": 0.1517, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1597263514995575, | |
| "step": 1550, | |
| "valid_targets_mean": 1388.5, | |
| "valid_targets_min": 844 | |
| }, | |
| { | |
| "epoch": 3.8206388206388207, | |
| "grad_norm": 0.8081116367574808, | |
| "learning_rate": 1.4070420338443667e-05, | |
| "loss": 0.1564, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15166549384593964, | |
| "step": 1555, | |
| "valid_targets_mean": 1418.8, | |
| "valid_targets_min": 652 | |
| }, | |
| { | |
| "epoch": 3.832923832923833, | |
| "grad_norm": 0.7779591710227076, | |
| "learning_rate": 1.3934007527532494e-05, | |
| "loss": 0.1559, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16257548332214355, | |
| "step": 1560, | |
| "valid_targets_mean": 1578.3, | |
| "valid_targets_min": 1012 | |
| }, | |
| { | |
| "epoch": 3.845208845208845, | |
| "grad_norm": 0.8215347465946208, | |
| "learning_rate": 1.3797904800835174e-05, | |
| "loss": 0.1541, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15061379969120026, | |
| "step": 1565, | |
| "valid_targets_mean": 1403.3, | |
| "valid_targets_min": 680 | |
| }, | |
| { | |
| "epoch": 3.8574938574938575, | |
| "grad_norm": 0.8043563222153439, | |
| "learning_rate": 1.3662119115713968e-05, | |
| "loss": 0.1558, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1362723857164383, | |
| "step": 1570, | |
| "valid_targets_mean": 1334.8, | |
| "valid_targets_min": 946 | |
| }, | |
| { | |
| "epoch": 3.8697788697788695, | |
| "grad_norm": 0.8195101713625148, | |
| "learning_rate": 1.3526657413324427e-05, | |
| "loss": 0.1493, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.143699049949646, | |
| "step": 1575, | |
| "valid_targets_mean": 1394.0, | |
| "valid_targets_min": 755 | |
| }, | |
| { | |
| "epoch": 3.882063882063882, | |
| "grad_norm": 0.80460297597485, | |
| "learning_rate": 1.3391526618260636e-05, | |
| "loss": 0.1537, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15249329805374146, | |
| "step": 1580, | |
| "valid_targets_mean": 1313.9, | |
| "valid_targets_min": 640 | |
| }, | |
| { | |
| "epoch": 3.8943488943488944, | |
| "grad_norm": 0.8784076776122989, | |
| "learning_rate": 1.3256733638201172e-05, | |
| "loss": 0.1528, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16345198452472687, | |
| "step": 1585, | |
| "valid_targets_mean": 1215.0, | |
| "valid_targets_min": 683 | |
| }, | |
| { | |
| "epoch": 3.906633906633907, | |
| "grad_norm": 0.7705036284083052, | |
| "learning_rate": 1.3122285363556053e-05, | |
| "loss": 0.1523, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1505666971206665, | |
| "step": 1590, | |
| "valid_targets_mean": 1404.9, | |
| "valid_targets_min": 552 | |
| }, | |
| { | |
| "epoch": 3.918918918918919, | |
| "grad_norm": 0.8819744058762196, | |
| "learning_rate": 1.2988188667114487e-05, | |
| "loss": 0.1542, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17144563794136047, | |
| "step": 1595, | |
| "valid_targets_mean": 1450.9, | |
| "valid_targets_min": 749 | |
| }, | |
| { | |
| "epoch": 3.9312039312039313, | |
| "grad_norm": 0.7673029734407929, | |
| "learning_rate": 1.2854450403693526e-05, | |
| "loss": 0.155, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15685027837753296, | |
| "step": 1600, | |
| "valid_targets_mean": 1363.2, | |
| "valid_targets_min": 670 | |
| }, | |
| { | |
| "epoch": 3.9434889434889433, | |
| "grad_norm": 0.817832214280846, | |
| "learning_rate": 1.272107740978769e-05, | |
| "loss": 0.1554, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14822739362716675, | |
| "step": 1605, | |
| "valid_targets_mean": 1326.2, | |
| "valid_targets_min": 612 | |
| }, | |
| { | |
| "epoch": 3.9557739557739557, | |
| "grad_norm": 0.8535781578111578, | |
| "learning_rate": 1.2588076503219475e-05, | |
| "loss": 0.1552, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1607990264892578, | |
| "step": 1610, | |
| "valid_targets_mean": 1378.2, | |
| "valid_targets_min": 792 | |
| }, | |
| { | |
| "epoch": 3.968058968058968, | |
| "grad_norm": 0.7728013852762362, | |
| "learning_rate": 1.2455454482790859e-05, | |
| "loss": 0.1611, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1566057652235031, | |
| "step": 1615, | |
| "valid_targets_mean": 1420.8, | |
| "valid_targets_min": 680 | |
| }, | |
| { | |
| "epoch": 3.98034398034398, | |
| "grad_norm": 0.7842204401862228, | |
| "learning_rate": 1.2323218127935714e-05, | |
| "loss": 0.1514, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15891683101654053, | |
| "step": 1620, | |
| "valid_targets_mean": 1429.4, | |
| "valid_targets_min": 706 | |
| }, | |
| { | |
| "epoch": 3.9926289926289926, | |
| "grad_norm": 0.8784267189106049, | |
| "learning_rate": 1.2191374198373309e-05, | |
| "loss": 0.1538, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15576815605163574, | |
| "step": 1625, | |
| "valid_targets_mean": 1184.1, | |
| "valid_targets_min": 693 | |
| }, | |
| { | |
| "epoch": 4.004914004914005, | |
| "grad_norm": 0.7366886987096114, | |
| "learning_rate": 1.2059929433762734e-05, | |
| "loss": 0.1528, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1368754506111145, | |
| "step": 1630, | |
| "valid_targets_mean": 1401.8, | |
| "valid_targets_min": 710 | |
| }, | |
| { | |
| "epoch": 4.017199017199017, | |
| "grad_norm": 0.7933638417270233, | |
| "learning_rate": 1.1928890553358352e-05, | |
| "loss": 0.1366, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13500437140464783, | |
| "step": 1635, | |
| "valid_targets_mean": 1565.2, | |
| "valid_targets_min": 987 | |
| }, | |
| { | |
| "epoch": 4.0294840294840295, | |
| "grad_norm": 0.8628014644200664, | |
| "learning_rate": 1.1798264255666387e-05, | |
| "loss": 0.1333, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14415526390075684, | |
| "step": 1640, | |
| "valid_targets_mean": 1300.3, | |
| "valid_targets_min": 602 | |
| }, | |
| { | |
| "epoch": 4.041769041769042, | |
| "grad_norm": 0.8002132940148077, | |
| "learning_rate": 1.1668057218102436e-05, | |
| "loss": 0.13, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12178932875394821, | |
| "step": 1645, | |
| "valid_targets_mean": 1487.4, | |
| "valid_targets_min": 597 | |
| }, | |
| { | |
| "epoch": 4.054054054054054, | |
| "grad_norm": 0.9820127911757913, | |
| "learning_rate": 1.1538276096650175e-05, | |
| "loss": 0.1323, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13494223356246948, | |
| "step": 1650, | |
| "valid_targets_mean": 1204.8, | |
| "valid_targets_min": 510 | |
| }, | |
| { | |
| "epoch": 4.066339066339066, | |
| "grad_norm": 0.8341539454673305, | |
| "learning_rate": 1.1408927525521118e-05, | |
| "loss": 0.1353, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1301422119140625, | |
| "step": 1655, | |
| "valid_targets_mean": 1414.1, | |
| "valid_targets_min": 768 | |
| }, | |
| { | |
| "epoch": 4.078624078624078, | |
| "grad_norm": 0.7995201505530926, | |
| "learning_rate": 1.1280018116815438e-05, | |
| "loss": 0.1318, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12719780206680298, | |
| "step": 1660, | |
| "valid_targets_mean": 1330.9, | |
| "valid_targets_min": 675 | |
| }, | |
| { | |
| "epoch": 4.090909090909091, | |
| "grad_norm": 0.910415927350699, | |
| "learning_rate": 1.115155446018404e-05, | |
| "loss": 0.1307, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1261419653892517, | |
| "step": 1665, | |
| "valid_targets_mean": 1129.1, | |
| "valid_targets_min": 618 | |
| }, | |
| { | |
| "epoch": 4.103194103194103, | |
| "grad_norm": 0.8034706735229478, | |
| "learning_rate": 1.1023543122491626e-05, | |
| "loss": 0.1328, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1415213644504547, | |
| "step": 1670, | |
| "valid_targets_mean": 1496.2, | |
| "valid_targets_min": 619 | |
| }, | |
| { | |
| "epoch": 4.115479115479116, | |
| "grad_norm": 0.866765318059242, | |
| "learning_rate": 1.089599064748108e-05, | |
| "loss": 0.1347, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12806713581085205, | |
| "step": 1675, | |
| "valid_targets_mean": 1218.8, | |
| "valid_targets_min": 829 | |
| }, | |
| { | |
| "epoch": 4.127764127764128, | |
| "grad_norm": 0.8555274258633575, | |
| "learning_rate": 1.0768903555438927e-05, | |
| "loss": 0.1334, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13543421030044556, | |
| "step": 1680, | |
| "valid_targets_mean": 1221.2, | |
| "valid_targets_min": 866 | |
| }, | |
| { | |
| "epoch": 4.14004914004914, | |
| "grad_norm": 0.8596973915714367, | |
| "learning_rate": 1.0642288342862007e-05, | |
| "loss": 0.1303, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13252198696136475, | |
| "step": 1685, | |
| "valid_targets_mean": 1356.1, | |
| "valid_targets_min": 821 | |
| }, | |
| { | |
| "epoch": 4.152334152334152, | |
| "grad_norm": 0.9208483153787601, | |
| "learning_rate": 1.051615148212544e-05, | |
| "loss": 0.1325, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13615989685058594, | |
| "step": 1690, | |
| "valid_targets_mean": 1221.8, | |
| "valid_targets_min": 673 | |
| }, | |
| { | |
| "epoch": 4.164619164619165, | |
| "grad_norm": 0.7991697418317443, | |
| "learning_rate": 1.0390499421151706e-05, | |
| "loss": 0.131, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12765686213970184, | |
| "step": 1695, | |
| "valid_targets_mean": 1561.8, | |
| "valid_targets_min": 838 | |
| }, | |
| { | |
| "epoch": 4.176904176904177, | |
| "grad_norm": 0.8981370633942914, | |
| "learning_rate": 1.0265338583081088e-05, | |
| "loss": 0.1298, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13098324835300446, | |
| "step": 1700, | |
| "valid_targets_mean": 1211.4, | |
| "valid_targets_min": 644 | |
| }, | |
| { | |
| "epoch": 4.1891891891891895, | |
| "grad_norm": 0.8844665537629925, | |
| "learning_rate": 1.0140675365943284e-05, | |
| "loss": 0.1339, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14116141200065613, | |
| "step": 1705, | |
| "valid_targets_mean": 1244.7, | |
| "valid_targets_min": 740 | |
| }, | |
| { | |
| "epoch": 4.201474201474202, | |
| "grad_norm": 0.8665683768372529, | |
| "learning_rate": 1.0016516142330404e-05, | |
| "loss": 0.1358, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13503558933734894, | |
| "step": 1710, | |
| "valid_targets_mean": 1241.2, | |
| "valid_targets_min": 817 | |
| }, | |
| { | |
| "epoch": 4.2137592137592135, | |
| "grad_norm": 0.8749047948719851, | |
| "learning_rate": 9.89286725907117e-06, | |
| "loss": 0.1306, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1354825794696808, | |
| "step": 1715, | |
| "valid_targets_mean": 1278.6, | |
| "valid_targets_min": 764 | |
| }, | |
| { | |
| "epoch": 4.226044226044226, | |
| "grad_norm": 0.9569051006969553, | |
| "learning_rate": 9.769735036906475e-06, | |
| "loss": 0.135, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13164186477661133, | |
| "step": 1720, | |
| "valid_targets_mean": 1170.8, | |
| "valid_targets_min": 686 | |
| }, | |
| { | |
| "epoch": 4.238329238329238, | |
| "grad_norm": 0.8501782042282354, | |
| "learning_rate": 9.647125770166321e-06, | |
| "loss": 0.1334, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13081029057502747, | |
| "step": 1725, | |
| "valid_targets_mean": 1290.1, | |
| "valid_targets_min": 582 | |
| }, | |
| { | |
| "epoch": 4.250614250614251, | |
| "grad_norm": 0.862775977696725, | |
| "learning_rate": 9.525045726448001e-06, | |
| "loss": 0.1335, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13716746866703033, | |
| "step": 1730, | |
| "valid_targets_mean": 1338.8, | |
| "valid_targets_min": 708 | |
| }, | |
| { | |
| "epoch": 4.262899262899263, | |
| "grad_norm": 0.8594823195898281, | |
| "learning_rate": 9.40350114629577e-06, | |
| "loss": 0.1318, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13566257059574127, | |
| "step": 1735, | |
| "valid_targets_mean": 1207.9, | |
| "valid_targets_min": 702 | |
| }, | |
| { | |
| "epoch": 4.275184275184275, | |
| "grad_norm": 0.8957849218271997, | |
| "learning_rate": 9.282498242881784e-06, | |
| "loss": 0.1342, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13811632990837097, | |
| "step": 1740, | |
| "valid_targets_mean": 1364.2, | |
| "valid_targets_min": 687 | |
| }, | |
| { | |
| "epoch": 4.287469287469287, | |
| "grad_norm": 0.7964442575263061, | |
| "learning_rate": 9.162043201688517e-06, | |
| "loss": 0.1325, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1246182844042778, | |
| "step": 1745, | |
| "valid_targets_mean": 1242.6, | |
| "valid_targets_min": 695 | |
| }, | |
| { | |
| "epoch": 4.2997542997543, | |
| "grad_norm": 0.8406930413358997, | |
| "learning_rate": 9.042142180192596e-06, | |
| "loss": 0.138, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13601821660995483, | |
| "step": 1750, | |
| "valid_targets_mean": 1232.2, | |
| "valid_targets_min": 750 | |
| }, | |
| { | |
| "epoch": 4.312039312039312, | |
| "grad_norm": 0.8829734298723391, | |
| "learning_rate": 8.92280130754998e-06, | |
| "loss": 0.1328, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13678283989429474, | |
| "step": 1755, | |
| "valid_targets_mean": 1385.4, | |
| "valid_targets_min": 683 | |
| }, | |
| { | |
| "epoch": 4.324324324324325, | |
| "grad_norm": 0.8513299309600417, | |
| "learning_rate": 8.804026684282694e-06, | |
| "loss": 0.1357, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13562187552452087, | |
| "step": 1760, | |
| "valid_targets_mean": 1322.9, | |
| "valid_targets_min": 723 | |
| }, | |
| { | |
| "epoch": 4.336609336609337, | |
| "grad_norm": 0.9423027917320065, | |
| "learning_rate": 8.685824381966975e-06, | |
| "loss": 0.1339, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14288148283958435, | |
| "step": 1765, | |
| "valid_targets_mean": 1257.4, | |
| "valid_targets_min": 661 | |
| }, | |
| { | |
| "epoch": 4.348894348894349, | |
| "grad_norm": 0.8541048898119664, | |
| "learning_rate": 8.568200442922865e-06, | |
| "loss": 0.1346, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13191963732242584, | |
| "step": 1770, | |
| "valid_targets_mean": 1367.1, | |
| "valid_targets_min": 624 | |
| }, | |
| { | |
| "epoch": 4.361179361179361, | |
| "grad_norm": 0.8840745838802464, | |
| "learning_rate": 8.451160879905398e-06, | |
| "loss": 0.1342, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1389164924621582, | |
| "step": 1775, | |
| "valid_targets_mean": 1295.1, | |
| "valid_targets_min": 699 | |
| }, | |
| { | |
| "epoch": 4.3734643734643734, | |
| "grad_norm": 0.8183971167908243, | |
| "learning_rate": 8.33471167579717e-06, | |
| "loss": 0.1369, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14170622825622559, | |
| "step": 1780, | |
| "valid_targets_mean": 1438.7, | |
| "valid_targets_min": 925 | |
| }, | |
| { | |
| "epoch": 4.385749385749386, | |
| "grad_norm": 0.8393962412502597, | |
| "learning_rate": 8.218858783302566e-06, | |
| "loss": 0.1386, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13835975527763367, | |
| "step": 1785, | |
| "valid_targets_mean": 1326.6, | |
| "valid_targets_min": 619 | |
| }, | |
| { | |
| "epoch": 4.398034398034398, | |
| "grad_norm": 0.8108514526997439, | |
| "learning_rate": 8.103608124643412e-06, | |
| "loss": 0.1296, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12403405457735062, | |
| "step": 1790, | |
| "valid_targets_mean": 1355.0, | |
| "valid_targets_min": 745 | |
| }, | |
| { | |
| "epoch": 4.41031941031941, | |
| "grad_norm": 0.8542346229314831, | |
| "learning_rate": 7.988965591256284e-06, | |
| "loss": 0.1399, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13956885039806366, | |
| "step": 1795, | |
| "valid_targets_mean": 1313.2, | |
| "valid_targets_min": 677 | |
| }, | |
| { | |
| "epoch": 4.422604422604422, | |
| "grad_norm": 0.8573348642882317, | |
| "learning_rate": 7.874937043491331e-06, | |
| "loss": 0.1371, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13548767566680908, | |
| "step": 1800, | |
| "valid_targets_mean": 1353.4, | |
| "valid_targets_min": 654 | |
| }, | |
| { | |
| "epoch": 4.434889434889435, | |
| "grad_norm": 0.8510677976707655, | |
| "learning_rate": 7.761528310312679e-06, | |
| "loss": 0.1364, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13224107027053833, | |
| "step": 1805, | |
| "valid_targets_mean": 1344.6, | |
| "valid_targets_min": 658 | |
| }, | |
| { | |
| "epoch": 4.447174447174447, | |
| "grad_norm": 0.834522243307913, | |
| "learning_rate": 7.648745189000511e-06, | |
| "loss": 0.1297, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12788425385951996, | |
| "step": 1810, | |
| "valid_targets_mean": 1313.6, | |
| "valid_targets_min": 607 | |
| }, | |
| { | |
| "epoch": 4.45945945945946, | |
| "grad_norm": 0.8706136253103205, | |
| "learning_rate": 7.536593444854663e-06, | |
| "loss": 0.1337, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13793480396270752, | |
| "step": 1815, | |
| "valid_targets_mean": 1411.5, | |
| "valid_targets_min": 679 | |
| }, | |
| { | |
| "epoch": 4.471744471744472, | |
| "grad_norm": 0.9145008420730636, | |
| "learning_rate": 7.4250788108999686e-06, | |
| "loss": 0.1353, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13442564010620117, | |
| "step": 1820, | |
| "valid_targets_mean": 1134.2, | |
| "valid_targets_min": 640 | |
| }, | |
| { | |
| "epoch": 4.484029484029484, | |
| "grad_norm": 0.8605459768965008, | |
| "learning_rate": 7.314206987593162e-06, | |
| "loss": 0.1406, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1341979205608368, | |
| "step": 1825, | |
| "valid_targets_mean": 1223.4, | |
| "valid_targets_min": 776 | |
| }, | |
| { | |
| "epoch": 4.496314496314496, | |
| "grad_norm": 0.8892492517359941, | |
| "learning_rate": 7.203983642531462e-06, | |
| "loss": 0.1373, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13275498151779175, | |
| "step": 1830, | |
| "valid_targets_mean": 1318.0, | |
| "valid_targets_min": 619 | |
| }, | |
| { | |
| "epoch": 4.5085995085995085, | |
| "grad_norm": 0.9124768833778905, | |
| "learning_rate": 7.094414410162913e-06, | |
| "loss": 0.1397, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1393716186285019, | |
| "step": 1835, | |
| "valid_targets_mean": 1195.8, | |
| "valid_targets_min": 689 | |
| }, | |
| { | |
| "epoch": 4.520884520884521, | |
| "grad_norm": 0.8035581857922256, | |
| "learning_rate": 6.985504891498291e-06, | |
| "loss": 0.1333, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12183650583028793, | |
| "step": 1840, | |
| "valid_targets_mean": 1309.6, | |
| "valid_targets_min": 661 | |
| }, | |
| { | |
| "epoch": 4.533169533169533, | |
| "grad_norm": 0.8705636872964952, | |
| "learning_rate": 6.8772606538248285e-06, | |
| "loss": 0.1343, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1374255120754242, | |
| "step": 1845, | |
| "valid_targets_mean": 1331.1, | |
| "valid_targets_min": 680 | |
| }, | |
| { | |
| "epoch": 4.545454545454545, | |
| "grad_norm": 0.8632896737975871, | |
| "learning_rate": 6.769687230421638e-06, | |
| "loss": 0.1351, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14031383395195007, | |
| "step": 1850, | |
| "valid_targets_mean": 1431.8, | |
| "valid_targets_min": 637 | |
| }, | |
| { | |
| "epoch": 4.557739557739557, | |
| "grad_norm": 0.8377827873733562, | |
| "learning_rate": 6.662790120276803e-06, | |
| "loss": 0.139, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14007632434368134, | |
| "step": 1855, | |
| "valid_targets_mean": 1524.4, | |
| "valid_targets_min": 802 | |
| }, | |
| { | |
| "epoch": 4.57002457002457, | |
| "grad_norm": 0.9293213235592381, | |
| "learning_rate": 6.556574787806344e-06, | |
| "loss": 0.1343, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14054429531097412, | |
| "step": 1860, | |
| "valid_targets_mean": 1152.1, | |
| "valid_targets_min": 578 | |
| }, | |
| { | |
| "epoch": 4.582309582309582, | |
| "grad_norm": 0.929391437997182, | |
| "learning_rate": 6.451046662574831e-06, | |
| "loss": 0.1387, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13908030092716217, | |
| "step": 1865, | |
| "valid_targets_mean": 1150.8, | |
| "valid_targets_min": 640 | |
| }, | |
| { | |
| "epoch": 4.594594594594595, | |
| "grad_norm": 0.835793531051664, | |
| "learning_rate": 6.346211139017877e-06, | |
| "loss": 0.1389, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1384456902742386, | |
| "step": 1870, | |
| "valid_targets_mean": 1486.2, | |
| "valid_targets_min": 684 | |
| }, | |
| { | |
| "epoch": 4.606879606879607, | |
| "grad_norm": 0.8826154929705194, | |
| "learning_rate": 6.242073576166337e-06, | |
| "loss": 0.1329, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13053680956363678, | |
| "step": 1875, | |
| "valid_targets_mean": 1282.6, | |
| "valid_targets_min": 700 | |
| }, | |
| { | |
| "epoch": 4.61916461916462, | |
| "grad_norm": 0.9065130410284179, | |
| "learning_rate": 6.138639297372404e-06, | |
| "loss": 0.1292, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13762430846691132, | |
| "step": 1880, | |
| "valid_targets_mean": 1407.1, | |
| "valid_targets_min": 873 | |
| }, | |
| { | |
| "epoch": 4.631449631449631, | |
| "grad_norm": 0.8994074928815686, | |
| "learning_rate": 6.035913590037479e-06, | |
| "loss": 0.1327, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13964726030826569, | |
| "step": 1885, | |
| "valid_targets_mean": 1228.9, | |
| "valid_targets_min": 726 | |
| }, | |
| { | |
| "epoch": 4.643734643734644, | |
| "grad_norm": 0.9604468903494978, | |
| "learning_rate": 5.933901705341851e-06, | |
| "loss": 0.135, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1333615779876709, | |
| "step": 1890, | |
| "valid_targets_mean": 1213.6, | |
| "valid_targets_min": 582 | |
| }, | |
| { | |
| "epoch": 4.656019656019656, | |
| "grad_norm": 0.8383010896296011, | |
| "learning_rate": 5.832608857976321e-06, | |
| "loss": 0.1336, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1478002667427063, | |
| "step": 1895, | |
| "valid_targets_mean": 1434.4, | |
| "valid_targets_min": 604 | |
| }, | |
| { | |
| "epoch": 4.6683046683046685, | |
| "grad_norm": 0.8805048874938732, | |
| "learning_rate": 5.732040225875584e-06, | |
| "loss": 0.1379, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14286617934703827, | |
| "step": 1900, | |
| "valid_targets_mean": 1351.0, | |
| "valid_targets_min": 740 | |
| }, | |
| { | |
| "epoch": 4.680589680589681, | |
| "grad_norm": 0.8676927609925812, | |
| "learning_rate": 5.632200949953579e-06, | |
| "loss": 0.132, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13202515244483948, | |
| "step": 1905, | |
| "valid_targets_mean": 1434.6, | |
| "valid_targets_min": 508 | |
| }, | |
| { | |
| "epoch": 4.6928746928746925, | |
| "grad_norm": 0.8588264500535372, | |
| "learning_rate": 5.533096133840677e-06, | |
| "loss": 0.1315, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1286821961402893, | |
| "step": 1910, | |
| "valid_targets_mean": 1292.1, | |
| "valid_targets_min": 624 | |
| }, | |
| { | |
| "epoch": 4.705159705159705, | |
| "grad_norm": 0.8238480351155095, | |
| "learning_rate": 5.434730843622778e-06, | |
| "loss": 0.1298, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1256740391254425, | |
| "step": 1915, | |
| "valid_targets_mean": 1475.9, | |
| "valid_targets_min": 941 | |
| }, | |
| { | |
| "epoch": 4.717444717444717, | |
| "grad_norm": 0.7987905827701233, | |
| "learning_rate": 5.337110107582377e-06, | |
| "loss": 0.1323, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13216213881969452, | |
| "step": 1920, | |
| "valid_targets_mean": 1585.5, | |
| "valid_targets_min": 933 | |
| }, | |
| { | |
| "epoch": 4.72972972972973, | |
| "grad_norm": 0.8202613091269093, | |
| "learning_rate": 5.2402389159414755e-06, | |
| "loss": 0.1352, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1317969113588333, | |
| "step": 1925, | |
| "valid_targets_mean": 1359.4, | |
| "valid_targets_min": 631 | |
| }, | |
| { | |
| "epoch": 4.742014742014742, | |
| "grad_norm": 0.8402543955369384, | |
| "learning_rate": 5.144122220606542e-06, | |
| "loss": 0.1331, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12853650748729706, | |
| "step": 1930, | |
| "valid_targets_mean": 1334.2, | |
| "valid_targets_min": 836 | |
| }, | |
| { | |
| "epoch": 4.754299754299755, | |
| "grad_norm": 0.8243105925153843, | |
| "learning_rate": 5.048764934915349e-06, | |
| "loss": 0.1349, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13137733936309814, | |
| "step": 1935, | |
| "valid_targets_mean": 1372.7, | |
| "valid_targets_min": 690 | |
| }, | |
| { | |
| "epoch": 4.766584766584766, | |
| "grad_norm": 0.8551744953134909, | |
| "learning_rate": 4.954171933385805e-06, | |
| "loss": 0.1324, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12337550520896912, | |
| "step": 1940, | |
| "valid_targets_mean": 1208.1, | |
| "valid_targets_min": 664 | |
| }, | |
| { | |
| "epoch": 4.778869778869779, | |
| "grad_norm": 0.8353661626353857, | |
| "learning_rate": 4.8603480514667836e-06, | |
| "loss": 0.139, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13476914167404175, | |
| "step": 1945, | |
| "valid_targets_mean": 1539.8, | |
| "valid_targets_min": 1046 | |
| }, | |
| { | |
| "epoch": 4.791154791154791, | |
| "grad_norm": 0.9201701172988107, | |
| "learning_rate": 4.767298085290963e-06, | |
| "loss": 0.1421, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13851451873779297, | |
| "step": 1950, | |
| "valid_targets_mean": 1200.8, | |
| "valid_targets_min": 814 | |
| }, | |
| { | |
| "epoch": 4.803439803439804, | |
| "grad_norm": 0.7903130103442203, | |
| "learning_rate": 4.675026791429624e-06, | |
| "loss": 0.1317, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13899393379688263, | |
| "step": 1955, | |
| "valid_targets_mean": 1431.2, | |
| "valid_targets_min": 618 | |
| }, | |
| { | |
| "epoch": 4.815724815724816, | |
| "grad_norm": 1.0032408075954347, | |
| "learning_rate": 4.583538886649525e-06, | |
| "loss": 0.1286, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12036003172397614, | |
| "step": 1960, | |
| "valid_targets_mean": 1207.3, | |
| "valid_targets_min": 769 | |
| }, | |
| { | |
| "epoch": 4.828009828009828, | |
| "grad_norm": 0.9056009299550104, | |
| "learning_rate": 4.492839047671764e-06, | |
| "loss": 0.1394, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1375010758638382, | |
| "step": 1965, | |
| "valid_targets_mean": 1240.6, | |
| "valid_targets_min": 716 | |
| }, | |
| { | |
| "epoch": 4.84029484029484, | |
| "grad_norm": 0.8588130580896964, | |
| "learning_rate": 4.4029319109327465e-06, | |
| "loss": 0.1351, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1344630867242813, | |
| "step": 1970, | |
| "valid_targets_mean": 1349.8, | |
| "valid_targets_min": 569 | |
| }, | |
| { | |
| "epoch": 4.8525798525798525, | |
| "grad_norm": 0.858165719251599, | |
| "learning_rate": 4.313822072347136e-06, | |
| "loss": 0.1296, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13171663880348206, | |
| "step": 1975, | |
| "valid_targets_mean": 1367.1, | |
| "valid_targets_min": 802 | |
| }, | |
| { | |
| "epoch": 4.864864864864865, | |
| "grad_norm": 0.875500075862758, | |
| "learning_rate": 4.22551408707296e-06, | |
| "loss": 0.1267, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12783968448638916, | |
| "step": 1980, | |
| "valid_targets_mean": 1204.9, | |
| "valid_targets_min": 608 | |
| }, | |
| { | |
| "epoch": 4.877149877149877, | |
| "grad_norm": 0.9874909007998213, | |
| "learning_rate": 4.138012469278714e-06, | |
| "loss": 0.1321, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12864485383033752, | |
| "step": 1985, | |
| "valid_targets_mean": 1459.3, | |
| "valid_targets_min": 754 | |
| }, | |
| { | |
| "epoch": 4.88943488943489, | |
| "grad_norm": 0.839208346024632, | |
| "learning_rate": 4.051321691912649e-06, | |
| "loss": 0.1322, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.126143679022789, | |
| "step": 1990, | |
| "valid_targets_mean": 1285.3, | |
| "valid_targets_min": 837 | |
| }, | |
| { | |
| "epoch": 4.901719901719901, | |
| "grad_norm": 0.8858250352209801, | |
| "learning_rate": 3.9654461864740935e-06, | |
| "loss": 0.1308, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12833839654922485, | |
| "step": 1995, | |
| "valid_targets_mean": 1380.1, | |
| "valid_targets_min": 808 | |
| }, | |
| { | |
| "epoch": 4.914004914004914, | |
| "grad_norm": 0.8408289236361944, | |
| "learning_rate": 3.880390342786915e-06, | |
| "loss": 0.1315, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13571733236312866, | |
| "step": 2000, | |
| "valid_targets_mean": 1429.1, | |
| "valid_targets_min": 683 | |
| }, | |
| { | |
| "epoch": 4.926289926289926, | |
| "grad_norm": 0.9080478383836208, | |
| "learning_rate": 3.7961585087751516e-06, | |
| "loss": 0.1322, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12336321920156479, | |
| "step": 2005, | |
| "valid_targets_mean": 1190.2, | |
| "valid_targets_min": 655 | |
| }, | |
| { | |
| "epoch": 4.938574938574939, | |
| "grad_norm": 0.8971940449378069, | |
| "learning_rate": 3.71275499024071e-06, | |
| "loss": 0.1325, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13041742146015167, | |
| "step": 2010, | |
| "valid_targets_mean": 1133.1, | |
| "valid_targets_min": 700 | |
| }, | |
| { | |
| "epoch": 4.950859950859951, | |
| "grad_norm": 0.8880840280709703, | |
| "learning_rate": 3.6301840506433083e-06, | |
| "loss": 0.1297, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1241886168718338, | |
| "step": 2015, | |
| "valid_targets_mean": 1268.0, | |
| "valid_targets_min": 672 | |
| }, | |
| { | |
| "epoch": 4.963144963144963, | |
| "grad_norm": 1.1031402665998207, | |
| "learning_rate": 3.5484499108824853e-06, | |
| "loss": 0.1353, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13872548937797546, | |
| "step": 2020, | |
| "valid_targets_mean": 1167.4, | |
| "valid_targets_min": 685 | |
| }, | |
| { | |
| "epoch": 4.975429975429975, | |
| "grad_norm": 0.8736057551200717, | |
| "learning_rate": 3.4675567490818727e-06, | |
| "loss": 0.1405, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14015041291713715, | |
| "step": 2025, | |
| "valid_targets_mean": 1295.9, | |
| "valid_targets_min": 600 | |
| }, | |
| { | |
| "epoch": 4.987714987714988, | |
| "grad_norm": 0.854353824628278, | |
| "learning_rate": 3.3875087003756036e-06, | |
| "loss": 0.1334, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1293148249387741, | |
| "step": 2030, | |
| "valid_targets_mean": 1316.2, | |
| "valid_targets_min": 714 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 0.8140342144412567, | |
| "learning_rate": 3.30830985669691e-06, | |
| "loss": 0.1312, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1288040578365326, | |
| "step": 2035, | |
| "valid_targets_mean": 1413.3, | |
| "valid_targets_min": 603 | |
| }, | |
| { | |
| "epoch": 5.012285012285012, | |
| "grad_norm": 0.8658864945122418, | |
| "learning_rate": 3.22996426656899e-06, | |
| "loss": 0.1239, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11617255955934525, | |
| "step": 2040, | |
| "valid_targets_mean": 1353.3, | |
| "valid_targets_min": 818 | |
| }, | |
| { | |
| "epoch": 5.024570024570025, | |
| "grad_norm": 0.8531142816529003, | |
| "learning_rate": 3.1524759348980096e-06, | |
| "loss": 0.1225, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12038596719503403, | |
| "step": 2045, | |
| "valid_targets_mean": 1211.6, | |
| "valid_targets_min": 668 | |
| }, | |
| { | |
| "epoch": 5.036855036855036, | |
| "grad_norm": 0.851372884888408, | |
| "learning_rate": 3.0758488227684212e-06, | |
| "loss": 0.1224, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12824995815753937, | |
| "step": 2050, | |
| "valid_targets_mean": 1328.2, | |
| "valid_targets_min": 546 | |
| }, | |
| { | |
| "epoch": 5.049140049140049, | |
| "grad_norm": 0.9706113177440796, | |
| "learning_rate": 3.0000868472404423e-06, | |
| "loss": 0.1269, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11911970376968384, | |
| "step": 2055, | |
| "valid_targets_mean": 1168.4, | |
| "valid_targets_min": 666 | |
| }, | |
| { | |
| "epoch": 5.061425061425061, | |
| "grad_norm": 0.9049095253944934, | |
| "learning_rate": 2.9251938811498436e-06, | |
| "loss": 0.1232, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1177496686577797, | |
| "step": 2060, | |
| "valid_targets_mean": 1182.1, | |
| "valid_targets_min": 571 | |
| }, | |
| { | |
| "epoch": 5.073710073710074, | |
| "grad_norm": 1.1566011104480272, | |
| "learning_rate": 2.8511737529099704e-06, | |
| "loss": 0.1218, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1201414167881012, | |
| "step": 2065, | |
| "valid_targets_mean": 1338.1, | |
| "valid_targets_min": 741 | |
| }, | |
| { | |
| "epoch": 5.085995085995086, | |
| "grad_norm": 0.956067933937365, | |
| "learning_rate": 2.7780302463160235e-06, | |
| "loss": 0.1296, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13462458550930023, | |
| "step": 2070, | |
| "valid_targets_mean": 1192.4, | |
| "valid_targets_min": 754 | |
| }, | |
| { | |
| "epoch": 5.098280098280099, | |
| "grad_norm": 0.838928942440734, | |
| "learning_rate": 2.705767100351673e-06, | |
| "loss": 0.1264, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11500583589076996, | |
| "step": 2075, | |
| "valid_targets_mean": 1345.1, | |
| "valid_targets_min": 564 | |
| }, | |
| { | |
| "epoch": 5.11056511056511, | |
| "grad_norm": 0.8734381458988081, | |
| "learning_rate": 2.634388008997899e-06, | |
| "loss": 0.1277, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12265495955944061, | |
| "step": 2080, | |
| "valid_targets_mean": 1291.5, | |
| "valid_targets_min": 802 | |
| }, | |
| { | |
| "epoch": 5.122850122850123, | |
| "grad_norm": 0.8413692002655475, | |
| "learning_rate": 2.5638966210441597e-06, | |
| "loss": 0.1188, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11556601524353027, | |
| "step": 2085, | |
| "valid_targets_mean": 1294.2, | |
| "valid_targets_min": 680 | |
| }, | |
| { | |
| "epoch": 5.135135135135135, | |
| "grad_norm": 0.8799660549940648, | |
| "learning_rate": 2.4942965399018926e-06, | |
| "loss": 0.1244, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12702292203903198, | |
| "step": 2090, | |
| "valid_targets_mean": 1269.8, | |
| "valid_targets_min": 477 | |
| }, | |
| { | |
| "epoch": 5.1474201474201475, | |
| "grad_norm": 0.8066619823829013, | |
| "learning_rate": 2.425591323420289e-06, | |
| "loss": 0.1234, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1243201345205307, | |
| "step": 2095, | |
| "valid_targets_mean": 1452.9, | |
| "valid_targets_min": 725 | |
| }, | |
| { | |
| "epoch": 5.15970515970516, | |
| "grad_norm": 0.9372266194267785, | |
| "learning_rate": 2.357784483704444e-06, | |
| "loss": 0.1224, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12215512990951538, | |
| "step": 2100, | |
| "valid_targets_mean": 1202.8, | |
| "valid_targets_min": 618 | |
| }, | |
| { | |
| "epoch": 5.171990171990172, | |
| "grad_norm": 0.8830570789936427, | |
| "learning_rate": 2.2908794869358044e-06, | |
| "loss": 0.1227, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12093114852905273, | |
| "step": 2105, | |
| "valid_targets_mean": 1264.2, | |
| "valid_targets_min": 669 | |
| }, | |
| { | |
| "epoch": 5.184275184275184, | |
| "grad_norm": 0.8934660083440941, | |
| "learning_rate": 2.2248797531949952e-06, | |
| "loss": 0.1233, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12020882964134216, | |
| "step": 2110, | |
| "valid_targets_mean": 1310.5, | |
| "valid_targets_min": 600 | |
| }, | |
| { | |
| "epoch": 5.196560196560196, | |
| "grad_norm": 0.8707949672125694, | |
| "learning_rate": 2.1597886562869917e-06, | |
| "loss": 0.1257, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12943926453590393, | |
| "step": 2115, | |
| "valid_targets_mean": 1316.6, | |
| "valid_targets_min": 609 | |
| }, | |
| { | |
| "epoch": 5.208845208845209, | |
| "grad_norm": 0.8234797789294522, | |
| "learning_rate": 2.095609523568638e-06, | |
| "loss": 0.1192, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11528286337852478, | |
| "step": 2120, | |
| "valid_targets_mean": 1418.0, | |
| "valid_targets_min": 800 | |
| }, | |
| { | |
| "epoch": 5.221130221130221, | |
| "grad_norm": 0.9194881267999129, | |
| "learning_rate": 2.0323456357785855e-06, | |
| "loss": 0.1247, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11602406203746796, | |
| "step": 2125, | |
| "valid_targets_mean": 1233.6, | |
| "valid_targets_min": 736 | |
| }, | |
| { | |
| "epoch": 5.233415233415234, | |
| "grad_norm": 0.8122022599016506, | |
| "learning_rate": 1.970000226869553e-06, | |
| "loss": 0.1205, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11653412133455276, | |
| "step": 2130, | |
| "valid_targets_mean": 1303.5, | |
| "valid_targets_min": 698 | |
| }, | |
| { | |
| "epoch": 5.245700245700245, | |
| "grad_norm": 0.923088217673824, | |
| "learning_rate": 1.90857648384305e-06, | |
| "loss": 0.1206, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11754988133907318, | |
| "step": 2135, | |
| "valid_targets_mean": 1166.8, | |
| "valid_targets_min": 849 | |
| }, | |
| { | |
| "epoch": 5.257985257985258, | |
| "grad_norm": 0.861527753194058, | |
| "learning_rate": 1.848077546586431e-06, | |
| "loss": 0.1199, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12186920642852783, | |
| "step": 2140, | |
| "valid_targets_mean": 1300.3, | |
| "valid_targets_min": 585 | |
| }, | |
| { | |
| "epoch": 5.27027027027027, | |
| "grad_norm": 0.9201608696103838, | |
| "learning_rate": 1.7885065077123976e-06, | |
| "loss": 0.1249, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11818292737007141, | |
| "step": 2145, | |
| "valid_targets_mean": 1208.6, | |
| "valid_targets_min": 700 | |
| }, | |
| { | |
| "epoch": 5.282555282555283, | |
| "grad_norm": 0.923690558438998, | |
| "learning_rate": 1.7298664124009245e-06, | |
| "loss": 0.1264, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12545941770076752, | |
| "step": 2150, | |
| "valid_targets_mean": 1258.4, | |
| "valid_targets_min": 774 | |
| }, | |
| { | |
| "epoch": 5.294840294840295, | |
| "grad_norm": 0.9367718219612289, | |
| "learning_rate": 1.672160258243567e-06, | |
| "loss": 0.1191, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11731995642185211, | |
| "step": 2155, | |
| "valid_targets_mean": 1109.1, | |
| "valid_targets_min": 577 | |
| }, | |
| { | |
| "epoch": 5.3071253071253075, | |
| "grad_norm": 0.9004332610521443, | |
| "learning_rate": 1.615390995090258e-06, | |
| "loss": 0.1199, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12512671947479248, | |
| "step": 2160, | |
| "valid_targets_mean": 1176.1, | |
| "valid_targets_min": 716 | |
| }, | |
| { | |
| "epoch": 5.319410319410319, | |
| "grad_norm": 0.8970853902420122, | |
| "learning_rate": 1.559561524898492e-06, | |
| "loss": 0.1227, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12076780945062637, | |
| "step": 2165, | |
| "valid_targets_mean": 1176.6, | |
| "valid_targets_min": 697 | |
| }, | |
| { | |
| "epoch": 5.3316953316953315, | |
| "grad_norm": 0.8961964820085028, | |
| "learning_rate": 1.5046747015849893e-06, | |
| "loss": 0.1187, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12210316956043243, | |
| "step": 2170, | |
| "valid_targets_mean": 1287.6, | |
| "valid_targets_min": 943 | |
| }, | |
| { | |
| "epoch": 5.343980343980344, | |
| "grad_norm": 0.9040366744497016, | |
| "learning_rate": 1.4507333308798255e-06, | |
| "loss": 0.1254, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11274679005146027, | |
| "step": 2175, | |
| "valid_targets_mean": 1213.7, | |
| "valid_targets_min": 680 | |
| }, | |
| { | |
| "epoch": 5.356265356265356, | |
| "grad_norm": 0.7970843543346068, | |
| "learning_rate": 1.3977401701829752e-06, | |
| "loss": 0.1161, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11382180452346802, | |
| "step": 2180, | |
| "valid_targets_mean": 1512.9, | |
| "valid_targets_min": 673 | |
| }, | |
| { | |
| "epoch": 5.368550368550369, | |
| "grad_norm": 0.8740938323727337, | |
| "learning_rate": 1.345697928423384e-06, | |
| "loss": 0.1237, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12005269527435303, | |
| "step": 2185, | |
| "valid_targets_mean": 1355.6, | |
| "valid_targets_min": 841 | |
| }, | |
| { | |
| "epoch": 5.38083538083538, | |
| "grad_norm": 0.9170510151174336, | |
| "learning_rate": 1.2946092659204767e-06, | |
| "loss": 0.1234, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12003514170646667, | |
| "step": 2190, | |
| "valid_targets_mean": 1087.2, | |
| "valid_targets_min": 626 | |
| }, | |
| { | |
| "epoch": 5.393120393120393, | |
| "grad_norm": 0.9207520601904955, | |
| "learning_rate": 1.244476794248175e-06, | |
| "loss": 0.1229, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12629008293151855, | |
| "step": 2195, | |
| "valid_targets_mean": 1249.9, | |
| "valid_targets_min": 690 | |
| }, | |
| { | |
| "epoch": 5.405405405405405, | |
| "grad_norm": 0.9726658004552319, | |
| "learning_rate": 1.1953030761014017e-06, | |
| "loss": 0.1204, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1273987889289856, | |
| "step": 2200, | |
| "valid_targets_mean": 1098.0, | |
| "valid_targets_min": 587 | |
| }, | |
| { | |
| "epoch": 5.417690417690418, | |
| "grad_norm": 0.8602382921139939, | |
| "learning_rate": 1.147090625165055e-06, | |
| "loss": 0.1205, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11596332490444183, | |
| "step": 2205, | |
| "valid_targets_mean": 1287.6, | |
| "valid_targets_min": 650 | |
| }, | |
| { | |
| "epoch": 5.42997542997543, | |
| "grad_norm": 0.8511123882579387, | |
| "learning_rate": 1.0998419059855503e-06, | |
| "loss": 0.1253, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11091361939907074, | |
| "step": 2210, | |
| "valid_targets_mean": 1244.9, | |
| "valid_targets_min": 693 | |
| }, | |
| { | |
| "epoch": 5.442260442260443, | |
| "grad_norm": 0.9311466380954836, | |
| "learning_rate": 1.053559333844798e-06, | |
| "loss": 0.1213, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12396387755870819, | |
| "step": 2215, | |
| "valid_targets_mean": 1159.0, | |
| "valid_targets_min": 619 | |
| }, | |
| { | |
| "epoch": 5.454545454545454, | |
| "grad_norm": 0.8785434056532222, | |
| "learning_rate": 1.0082452746367721e-06, | |
| "loss": 0.1196, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1240609884262085, | |
| "step": 2220, | |
| "valid_targets_mean": 1384.5, | |
| "valid_targets_min": 695 | |
| }, | |
| { | |
| "epoch": 5.466830466830467, | |
| "grad_norm": 0.8130432819933735, | |
| "learning_rate": 9.639020447465475e-07, | |
| "loss": 0.1241, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12418870627880096, | |
| "step": 2225, | |
| "valid_targets_mean": 1512.6, | |
| "valid_targets_min": 864 | |
| }, | |
| { | |
| "epoch": 5.479115479115479, | |
| "grad_norm": 1.0657851830704053, | |
| "learning_rate": 9.205319109318922e-07, | |
| "loss": 0.1193, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11516118049621582, | |
| "step": 2230, | |
| "valid_targets_mean": 1181.9, | |
| "valid_targets_min": 692 | |
| }, | |
| { | |
| "epoch": 5.4914004914004915, | |
| "grad_norm": 0.8116861807457518, | |
| "learning_rate": 8.781370902074049e-07, | |
| "loss": 0.1225, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11950664222240448, | |
| "step": 2235, | |
| "valid_targets_mean": 1536.9, | |
| "valid_targets_min": 789 | |
| }, | |
| { | |
| "epoch": 5.503685503685504, | |
| "grad_norm": 0.8069319101293884, | |
| "learning_rate": 8.367197497311719e-07, | |
| "loss": 0.1236, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11634531617164612, | |
| "step": 2240, | |
| "valid_targets_mean": 1464.1, | |
| "valid_targets_min": 881 | |
| }, | |
| { | |
| "epoch": 5.515970515970516, | |
| "grad_norm": 0.9168477141212379, | |
| "learning_rate": 7.962820066939958e-07, | |
| "loss": 0.1259, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12269000709056854, | |
| "step": 2245, | |
| "valid_targets_mean": 1244.7, | |
| "valid_targets_min": 651 | |
| }, | |
| { | |
| "epoch": 5.528255528255528, | |
| "grad_norm": 0.9154886310694959, | |
| "learning_rate": 7.568259282111645e-07, | |
| "loss": 0.123, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12214012444019318, | |
| "step": 2250, | |
| "valid_targets_mean": 1273.2, | |
| "valid_targets_min": 525 | |
| }, | |
| { | |
| "epoch": 5.54054054054054, | |
| "grad_norm": 0.9178352019380714, | |
| "learning_rate": 7.183535312167755e-07, | |
| "loss": 0.1271, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12746351957321167, | |
| "step": 2255, | |
| "valid_targets_mean": 1302.9, | |
| "valid_targets_min": 711 | |
| }, | |
| { | |
| "epoch": 5.552825552825553, | |
| "grad_norm": 0.8573390318962475, | |
| "learning_rate": 6.808667823606474e-07, | |
| "loss": 0.1198, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11149466037750244, | |
| "step": 2260, | |
| "valid_targets_mean": 1217.7, | |
| "valid_targets_min": 633 | |
| }, | |
| { | |
| "epoch": 5.565110565110565, | |
| "grad_norm": 0.9765224591808254, | |
| "learning_rate": 6.443675979077779e-07, | |
| "loss": 0.1203, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12065261602401733, | |
| "step": 2265, | |
| "valid_targets_mean": 1159.2, | |
| "valid_targets_min": 648 | |
| }, | |
| { | |
| "epoch": 5.577395577395578, | |
| "grad_norm": 0.9198036593890597, | |
| "learning_rate": 6.088578436403847e-07, | |
| "loss": 0.1206, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1227637380361557, | |
| "step": 2270, | |
| "valid_targets_mean": 1184.3, | |
| "valid_targets_min": 773 | |
| }, | |
| { | |
| "epoch": 5.58968058968059, | |
| "grad_norm": 1.2793621907091794, | |
| "learning_rate": 5.743393347625436e-07, | |
| "loss": 0.1208, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12617771327495575, | |
| "step": 2275, | |
| "valid_targets_mean": 1232.4, | |
| "valid_targets_min": 682 | |
| }, | |
| { | |
| "epoch": 5.601965601965602, | |
| "grad_norm": 0.8920900858528049, | |
| "learning_rate": 5.408138358073833e-07, | |
| "loss": 0.1219, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11795288324356079, | |
| "step": 2280, | |
| "valid_targets_mean": 1211.9, | |
| "valid_targets_min": 817 | |
| }, | |
| { | |
| "epoch": 5.614250614250614, | |
| "grad_norm": 0.8571667886521912, | |
| "learning_rate": 5.082830605468969e-07, | |
| "loss": 0.1245, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12336680293083191, | |
| "step": 2285, | |
| "valid_targets_mean": 1465.6, | |
| "valid_targets_min": 693 | |
| }, | |
| { | |
| "epoch": 5.6265356265356266, | |
| "grad_norm": 0.8992558869551788, | |
| "learning_rate": 4.767486719043235e-07, | |
| "loss": 0.1266, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12730351090431213, | |
| "step": 2290, | |
| "valid_targets_mean": 1303.9, | |
| "valid_targets_min": 618 | |
| }, | |
| { | |
| "epoch": 5.638820638820639, | |
| "grad_norm": 0.8591535704564042, | |
| "learning_rate": 4.4621228186915833e-07, | |
| "loss": 0.1207, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11848782002925873, | |
| "step": 2295, | |
| "valid_targets_mean": 1375.6, | |
| "valid_targets_min": 663 | |
| }, | |
| { | |
| "epoch": 5.651105651105651, | |
| "grad_norm": 0.8284994670030315, | |
| "learning_rate": 4.166754514147275e-07, | |
| "loss": 0.1201, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11597757041454315, | |
| "step": 2300, | |
| "valid_targets_mean": 1349.8, | |
| "valid_targets_min": 694 | |
| }, | |
| { | |
| "epoch": 5.663390663390663, | |
| "grad_norm": 0.8646964113912086, | |
| "learning_rate": 3.881396904184231e-07, | |
| "loss": 0.1247, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13614369928836823, | |
| "step": 2305, | |
| "valid_targets_mean": 1455.5, | |
| "valid_targets_min": 857 | |
| }, | |
| { | |
| "epoch": 5.675675675675675, | |
| "grad_norm": 0.8404310937913154, | |
| "learning_rate": 3.6060645758449584e-07, | |
| "loss": 0.1176, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10538631677627563, | |
| "step": 2310, | |
| "valid_targets_mean": 1306.6, | |
| "valid_targets_min": 748 | |
| }, | |
| { | |
| "epoch": 5.687960687960688, | |
| "grad_norm": 0.9597923363817331, | |
| "learning_rate": 3.34077160369497e-07, | |
| "loss": 0.1244, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13196837902069092, | |
| "step": 2315, | |
| "valid_targets_mean": 1318.8, | |
| "valid_targets_min": 648 | |
| }, | |
| { | |
| "epoch": 5.7002457002457, | |
| "grad_norm": 0.8652724839299443, | |
| "learning_rate": 3.08553154910336e-07, | |
| "loss": 0.1244, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11592702567577362, | |
| "step": 2320, | |
| "valid_targets_mean": 1257.1, | |
| "valid_targets_min": 745 | |
| }, | |
| { | |
| "epoch": 5.712530712530713, | |
| "grad_norm": 0.9423087662082816, | |
| "learning_rate": 2.840357459549492e-07, | |
| "loss": 0.1219, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1314229667186737, | |
| "step": 2325, | |
| "valid_targets_mean": 1301.2, | |
| "valid_targets_min": 673 | |
| }, | |
| { | |
| "epoch": 5.724815724815725, | |
| "grad_norm": 0.8385220080184815, | |
| "learning_rate": 2.6052618679560884e-07, | |
| "loss": 0.1241, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12265875935554504, | |
| "step": 2330, | |
| "valid_targets_mean": 1400.9, | |
| "valid_targets_min": 586 | |
| }, | |
| { | |
| "epoch": 5.737100737100737, | |
| "grad_norm": 0.8945813157127893, | |
| "learning_rate": 2.380256792048541e-07, | |
| "loss": 0.1183, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11247716099023819, | |
| "step": 2335, | |
| "valid_targets_mean": 1192.9, | |
| "valid_targets_min": 690 | |
| }, | |
| { | |
| "epoch": 5.749385749385749, | |
| "grad_norm": 0.8960103105650793, | |
| "learning_rate": 2.1653537337405383e-07, | |
| "loss": 0.1199, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1242547482252121, | |
| "step": 2340, | |
| "valid_targets_mean": 1306.6, | |
| "valid_targets_min": 683 | |
| }, | |
| { | |
| "epoch": 5.761670761670762, | |
| "grad_norm": 0.8238805899128148, | |
| "learning_rate": 1.9605636785462234e-07, | |
| "loss": 0.1189, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11851818114519119, | |
| "step": 2345, | |
| "valid_targets_mean": 1488.6, | |
| "valid_targets_min": 647 | |
| }, | |
| { | |
| "epoch": 5.773955773955774, | |
| "grad_norm": 0.8855688391079217, | |
| "learning_rate": 1.7658970950185095e-07, | |
| "loss": 0.1195, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11863315850496292, | |
| "step": 2350, | |
| "valid_targets_mean": 1377.8, | |
| "valid_targets_min": 627 | |
| }, | |
| { | |
| "epoch": 5.7862407862407865, | |
| "grad_norm": 0.855607979979025, | |
| "learning_rate": 1.5813639342140197e-07, | |
| "loss": 0.1203, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12171070277690887, | |
| "step": 2355, | |
| "valid_targets_mean": 1414.1, | |
| "valid_targets_min": 559 | |
| }, | |
| { | |
| "epoch": 5.798525798525798, | |
| "grad_norm": 0.8316792475848385, | |
| "learning_rate": 1.4069736291843605e-07, | |
| "loss": 0.122, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1226361021399498, | |
| "step": 2360, | |
| "valid_targets_mean": 1355.3, | |
| "valid_targets_min": 769 | |
| }, | |
| { | |
| "epoch": 5.8108108108108105, | |
| "grad_norm": 0.9260773825030689, | |
| "learning_rate": 1.242735094493952e-07, | |
| "loss": 0.1277, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13377943634986877, | |
| "step": 2365, | |
| "valid_targets_mean": 1301.8, | |
| "valid_targets_min": 709 | |
| }, | |
| { | |
| "epoch": 5.823095823095823, | |
| "grad_norm": 0.855757951656998, | |
| "learning_rate": 1.0886567257643033e-07, | |
| "loss": 0.1266, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1385726034641266, | |
| "step": 2370, | |
| "valid_targets_mean": 1503.9, | |
| "valid_targets_min": 754 | |
| }, | |
| { | |
| "epoch": 5.835380835380835, | |
| "grad_norm": 0.9407815841928389, | |
| "learning_rate": 9.447463992448891e-08, | |
| "loss": 0.1228, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12503711879253387, | |
| "step": 2375, | |
| "valid_targets_mean": 1253.7, | |
| "valid_targets_min": 728 | |
| }, | |
| { | |
| "epoch": 5.847665847665848, | |
| "grad_norm": 0.8611472329035134, | |
| "learning_rate": 8.110114714104277e-08, | |
| "loss": 0.1206, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12182988226413727, | |
| "step": 2380, | |
| "valid_targets_mean": 1357.1, | |
| "valid_targets_min": 798 | |
| }, | |
| { | |
| "epoch": 5.85995085995086, | |
| "grad_norm": 0.8641587037941166, | |
| "learning_rate": 6.874587785849152e-08, | |
| "loss": 0.1166, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11340140551328659, | |
| "step": 2385, | |
| "valid_targets_mean": 1219.0, | |
| "valid_targets_min": 563 | |
| }, | |
| { | |
| "epoch": 5.872235872235873, | |
| "grad_norm": 0.7937227445555616, | |
| "learning_rate": 5.7409463659219286e-08, | |
| "loss": 0.1267, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12618783116340637, | |
| "step": 2390, | |
| "valid_targets_mean": 1584.6, | |
| "valid_targets_min": 639 | |
| }, | |
| { | |
| "epoch": 5.884520884520884, | |
| "grad_norm": 0.8529953792401733, | |
| "learning_rate": 4.709248404329625e-08, | |
| "loss": 0.1185, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11412382125854492, | |
| "step": 2395, | |
| "valid_targets_mean": 1260.2, | |
| "valid_targets_min": 783 | |
| }, | |
| { | |
| "epoch": 5.896805896805897, | |
| "grad_norm": 0.9011885429861078, | |
| "learning_rate": 3.7795466398868885e-08, | |
| "loss": 0.1207, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11974605917930603, | |
| "step": 2400, | |
| "valid_targets_mean": 1240.2, | |
| "valid_targets_min": 633 | |
| }, | |
| { | |
| "epoch": 5.909090909090909, | |
| "grad_norm": 0.8848787748180499, | |
| "learning_rate": 2.9518885975192702e-08, | |
| "loss": 0.1226, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12469591945409775, | |
| "step": 2405, | |
| "valid_targets_mean": 1286.1, | |
| "valid_targets_min": 679 | |
| }, | |
| { | |
| "epoch": 5.921375921375922, | |
| "grad_norm": 0.9431735853443147, | |
| "learning_rate": 2.226316585833832e-08, | |
| "loss": 0.1247, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13896986842155457, | |
| "step": 2410, | |
| "valid_targets_mean": 1285.1, | |
| "valid_targets_min": 538 | |
| }, | |
| { | |
| "epoch": 5.933660933660933, | |
| "grad_norm": 0.8984084286385163, | |
| "learning_rate": 1.6028676949570997e-08, | |
| "loss": 0.1276, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12550409138202667, | |
| "step": 2415, | |
| "valid_targets_mean": 1235.6, | |
| "valid_targets_min": 770 | |
| }, | |
| { | |
| "epoch": 5.945945945945946, | |
| "grad_norm": 0.8546509372519073, | |
| "learning_rate": 1.0815737946383575e-08, | |
| "loss": 0.1194, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12137577682733536, | |
| "step": 2420, | |
| "valid_targets_mean": 1372.6, | |
| "valid_targets_min": 965 | |
| }, | |
| { | |
| "epoch": 5.958230958230958, | |
| "grad_norm": 0.8630819495011012, | |
| "learning_rate": 6.624615326207284e-09, | |
| "loss": 0.1187, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11519676446914673, | |
| "step": 2425, | |
| "valid_targets_mean": 1213.4, | |
| "valid_targets_min": 575 | |
| }, | |
| { | |
| "epoch": 5.9705159705159705, | |
| "grad_norm": 0.7804352739463468, | |
| "learning_rate": 3.4555233327893124e-09, | |
| "loss": 0.1292, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11273680627346039, | |
| "step": 2430, | |
| "valid_targets_mean": 1430.3, | |
| "valid_targets_min": 724 | |
| }, | |
| { | |
| "epoch": 5.982800982800983, | |
| "grad_norm": 0.8044047005361604, | |
| "learning_rate": 1.3086239652415621e-09, | |
| "loss": 0.1196, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1092795580625534, | |
| "step": 2435, | |
| "valid_targets_mean": 1334.7, | |
| "valid_targets_min": 751 | |
| }, | |
| { | |
| "epoch": 5.995085995085995, | |
| "grad_norm": 0.9276448583737459, | |
| "learning_rate": 1.840269697628294e-10, | |
| "loss": 0.1211, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12196892499923706, | |
| "step": 2440, | |
| "valid_targets_mean": 1338.7, | |
| "valid_targets_min": 687 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12042035162448883, | |
| "step": 2442, | |
| "total_flos": 254043172110336.0, | |
| "train_loss": 0.18868140803068803, | |
| "train_runtime": 8014.1282, | |
| "train_samples_per_second": 4.869, | |
| "train_steps_per_second": 0.305, | |
| "valid_targets_mean": 1261.1, | |
| "valid_targets_min": 857 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 2442, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 6, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 254043172110336.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |