| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 14.94964565460649, | |
| "eval_steps": 500, | |
| "global_step": 2505, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 29.208302611647333, | |
| "learning_rate": 9.960159362549802e-08, | |
| "loss": 1.1515, | |
| "sft_loss": 0.1292552625760436, | |
| "step": 5, | |
| "total_loss": 0.15341808083976502, | |
| "value_loss": 0.24162817830219865, | |
| "value_loss_search": 0.8858459698676597, | |
| "value_loss_thought": 1.0471794582015719 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 22.329741865045907, | |
| "learning_rate": 1.9920318725099604e-07, | |
| "loss": 1.2242, | |
| "sft_loss": 0.13713806308805943, | |
| "step": 10, | |
| "total_loss": 0.16430261046643863, | |
| "value_loss": 0.27164546787971633, | |
| "value_loss_search": 1.0301193907580455, | |
| "value_loss_thought": 1.1430443533812649 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "grad_norm": 16.69220166984679, | |
| "learning_rate": 2.9880478087649405e-07, | |
| "loss": 1.1712, | |
| "sft_loss": 0.11755451802164316, | |
| "step": 15, | |
| "total_loss": 0.1385691657076677, | |
| "value_loss": 0.21014647737029007, | |
| "value_loss_search": 0.8529300302307092, | |
| "value_loss_thought": 0.8282418000162579 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "grad_norm": 15.26729873693678, | |
| "learning_rate": 3.9840637450199207e-07, | |
| "loss": 1.047, | |
| "sft_loss": 0.08705389499664307, | |
| "step": 20, | |
| "total_loss": 0.10808856324001681, | |
| "value_loss": 0.21034667690400966, | |
| "value_loss_search": 0.9239509059012562, | |
| "value_loss_thought": 0.7588225028477609 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "grad_norm": 16.491728960581845, | |
| "learning_rate": 4.9800796812749e-07, | |
| "loss": 1.1149, | |
| "sft_loss": 0.11774167586117983, | |
| "step": 25, | |
| "total_loss": 0.13845662841631565, | |
| "value_loss": 0.20714952626731248, | |
| "value_loss_search": 0.7495332275483975, | |
| "value_loss_thought": 0.9076629818649963 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "grad_norm": 13.549454686271, | |
| "learning_rate": 5.976095617529881e-07, | |
| "loss": 0.883, | |
| "sft_loss": 0.083274289034307, | |
| "step": 30, | |
| "total_loss": 0.10342498328827787, | |
| "value_loss": 0.2015069484245032, | |
| "value_loss_search": 0.6761670514111756, | |
| "value_loss_thought": 0.9358885432491661 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "grad_norm": 13.443693495652768, | |
| "learning_rate": 6.972111553784861e-07, | |
| "loss": 0.805, | |
| "sft_loss": 0.08413450215011835, | |
| "step": 35, | |
| "total_loss": 0.1025179280739394, | |
| "value_loss": 0.18383425649954005, | |
| "value_loss_search": 0.5737597634542908, | |
| "value_loss_thought": 0.8969142883783207 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 9.393174013923895, | |
| "learning_rate": 7.968127490039841e-07, | |
| "loss": 0.7925, | |
| "sft_loss": 0.07780555076897144, | |
| "step": 40, | |
| "total_loss": 0.0958856519588153, | |
| "value_loss": 0.18080101079540328, | |
| "value_loss_search": 0.6263274765132223, | |
| "value_loss_thought": 0.8200806069507962 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "grad_norm": 14.557899943787424, | |
| "learning_rate": 8.964143426294822e-07, | |
| "loss": 0.6601, | |
| "sft_loss": 0.06730504501610994, | |
| "step": 45, | |
| "total_loss": 0.08439774930666318, | |
| "value_loss": 0.17092704317183233, | |
| "value_loss_search": 0.6156709093504105, | |
| "value_loss_thought": 0.7517454390181229 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "grad_norm": 5.237593181257475, | |
| "learning_rate": 9.9601593625498e-07, | |
| "loss": 0.5741, | |
| "sft_loss": 0.05471886033192277, | |
| "step": 50, | |
| "total_loss": 0.06898687301945756, | |
| "value_loss": 0.14268012425163762, | |
| "value_loss_search": 0.5047549274406264, | |
| "value_loss_thought": 0.6366860627662391 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "grad_norm": 5.49922083273137, | |
| "learning_rate": 1.0956175298804781e-06, | |
| "loss": 0.5123, | |
| "sft_loss": 0.05361118288710713, | |
| "step": 55, | |
| "total_loss": 0.0653495688289695, | |
| "value_loss": 0.1173838603310287, | |
| "value_loss_search": 0.3898644742756964, | |
| "value_loss_thought": 0.5492064105579629 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "grad_norm": 4.288203361946918, | |
| "learning_rate": 1.1952191235059762e-06, | |
| "loss": 0.4635, | |
| "sft_loss": 0.03561480939388275, | |
| "step": 60, | |
| "total_loss": 0.04738654125249013, | |
| "value_loss": 0.11771731851040386, | |
| "value_loss_search": 0.3933807262365008, | |
| "value_loss_thought": 0.5483578289393336 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "grad_norm": 4.058680831426542, | |
| "learning_rate": 1.294820717131474e-06, | |
| "loss": 0.405, | |
| "sft_loss": 0.03836059970781207, | |
| "step": 65, | |
| "total_loss": 0.049667860931367616, | |
| "value_loss": 0.1130726064555347, | |
| "value_loss_search": 0.39820473948711876, | |
| "value_loss_thought": 0.5063761109966436 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "grad_norm": 6.869902478996511, | |
| "learning_rate": 1.3944223107569721e-06, | |
| "loss": 0.3733, | |
| "sft_loss": 0.03362296093255281, | |
| "step": 70, | |
| "total_loss": 0.04714309505507117, | |
| "value_loss": 0.13520133687416092, | |
| "value_loss_search": 0.4016104494468891, | |
| "value_loss_thought": 0.6800002471776679 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "grad_norm": 6.311680041987265, | |
| "learning_rate": 1.4940239043824702e-06, | |
| "loss": 0.3481, | |
| "sft_loss": 0.028891393821686505, | |
| "step": 75, | |
| "total_loss": 0.04102085728627571, | |
| "value_loss": 0.12129463239980395, | |
| "value_loss_search": 0.4050044394622091, | |
| "value_loss_thought": 0.5653526145266369 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 2.336922175624455, | |
| "learning_rate": 1.5936254980079683e-06, | |
| "loss": 0.3233, | |
| "sft_loss": 0.022906177304685117, | |
| "step": 80, | |
| "total_loss": 0.034932951488735854, | |
| "value_loss": 0.12026773888501338, | |
| "value_loss_search": 0.3894530791064881, | |
| "value_loss_thought": 0.5726888305682223 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "grad_norm": 5.45038020134611, | |
| "learning_rate": 1.6932270916334661e-06, | |
| "loss": 0.3587, | |
| "sft_loss": 0.028216248843818903, | |
| "step": 85, | |
| "total_loss": 0.04048567872773674, | |
| "value_loss": 0.12269429753323494, | |
| "value_loss_search": 0.40813677250607727, | |
| "value_loss_thought": 0.5734176081576152 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "grad_norm": 4.373139438548536, | |
| "learning_rate": 1.7928286852589644e-06, | |
| "loss": 0.322, | |
| "sft_loss": 0.030063434736803174, | |
| "step": 90, | |
| "total_loss": 0.0427513758506393, | |
| "value_loss": 0.1268794086528942, | |
| "value_loss_search": 0.45016070968977145, | |
| "value_loss_thought": 0.5648745556129142 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "grad_norm": 3.3872903271458408, | |
| "learning_rate": 1.8924302788844623e-06, | |
| "loss": 0.3401, | |
| "sft_loss": 0.03258526036515832, | |
| "step": 95, | |
| "total_loss": 0.04433769078787009, | |
| "value_loss": 0.11752430766646285, | |
| "value_loss_search": 0.3465800850848808, | |
| "value_loss_thought": 0.5936143765953602 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 2.899664564340515, | |
| "learning_rate": 1.99203187250996e-06, | |
| "loss": 0.3067, | |
| "sft_loss": 0.023941753478720785, | |
| "step": 100, | |
| "total_loss": 0.03416529792812071, | |
| "value_loss": 0.10223544192849658, | |
| "value_loss_search": 0.3503992037207354, | |
| "value_loss_thought": 0.46748433216125707 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "grad_norm": 5.3644737487705125, | |
| "learning_rate": 2.0916334661354584e-06, | |
| "loss": 0.3253, | |
| "sft_loss": 0.03116959072649479, | |
| "step": 105, | |
| "total_loss": 0.04235388926063024, | |
| "value_loss": 0.1118429817724973, | |
| "value_loss_search": 0.3848635824284429, | |
| "value_loss_thought": 0.509880276385229 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "grad_norm": 2.807659467395103, | |
| "learning_rate": 2.1912350597609563e-06, | |
| "loss": 0.3242, | |
| "sft_loss": 0.030541227804496883, | |
| "step": 110, | |
| "total_loss": 0.03971213593467837, | |
| "value_loss": 0.09170907911611721, | |
| "value_loss_search": 0.2967309238272719, | |
| "value_loss_thought": 0.4369417035020888 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "grad_norm": 5.565383438847989, | |
| "learning_rate": 2.290836653386454e-06, | |
| "loss": 0.2985, | |
| "sft_loss": 0.03250002646818757, | |
| "step": 115, | |
| "total_loss": 0.04239036420258344, | |
| "value_loss": 0.09890337522811024, | |
| "value_loss_search": 0.3520674262268585, | |
| "value_loss_thought": 0.43915957515127957 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "grad_norm": 6.59850948978845, | |
| "learning_rate": 2.3904382470119524e-06, | |
| "loss": 0.3364, | |
| "sft_loss": 0.025409412384033204, | |
| "step": 120, | |
| "total_loss": 0.03802986370919825, | |
| "value_loss": 0.12620451210823375, | |
| "value_loss_search": 0.3680036140809534, | |
| "value_loss_thought": 0.6416324874851853 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "grad_norm": 5.179904397966752, | |
| "learning_rate": 2.4900398406374503e-06, | |
| "loss": 0.3036, | |
| "sft_loss": 0.029751096572726964, | |
| "step": 125, | |
| "total_loss": 0.04014970105636166, | |
| "value_loss": 0.10398604299989529, | |
| "value_loss_search": 0.3222789403582283, | |
| "value_loss_thought": 0.5096093997417483 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "grad_norm": 2.7818925064532296, | |
| "learning_rate": 2.589641434262948e-06, | |
| "loss": 0.3135, | |
| "sft_loss": 0.027432794403284787, | |
| "step": 130, | |
| "total_loss": 0.039190283310017546, | |
| "value_loss": 0.1175748852387187, | |
| "value_loss_search": 0.379848483775595, | |
| "value_loss_thought": 0.5607506038024439 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "grad_norm": 3.1180919559174898, | |
| "learning_rate": 2.6892430278884464e-06, | |
| "loss": 0.2712, | |
| "sft_loss": 0.02562392014078796, | |
| "step": 135, | |
| "total_loss": 0.037023369785310935, | |
| "value_loss": 0.11399449376476696, | |
| "value_loss_search": 0.3599409429902153, | |
| "value_loss_thought": 0.552015009484603 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "grad_norm": 4.040800077838943, | |
| "learning_rate": 2.7888446215139443e-06, | |
| "loss": 0.3085, | |
| "sft_loss": 0.030298156943172217, | |
| "step": 140, | |
| "total_loss": 0.040708614706454684, | |
| "value_loss": 0.10410457447142107, | |
| "value_loss_search": 0.3299579021160753, | |
| "value_loss_thought": 0.5028786922775907 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "grad_norm": 3.7542993625502863, | |
| "learning_rate": 2.8884462151394426e-06, | |
| "loss": 0.2901, | |
| "sft_loss": 0.033623593579977754, | |
| "step": 145, | |
| "total_loss": 0.04418132658774994, | |
| "value_loss": 0.10557732760353247, | |
| "value_loss_search": 0.3160576841353759, | |
| "value_loss_thought": 0.528560933744302 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "grad_norm": 4.460937657390284, | |
| "learning_rate": 2.9880478087649404e-06, | |
| "loss": 0.2916, | |
| "sft_loss": 0.027346841990947723, | |
| "step": 150, | |
| "total_loss": 0.03589563165332947, | |
| "value_loss": 0.08548789632186526, | |
| "value_loss_search": 0.2342234575400653, | |
| "value_loss_thought": 0.4496797103092831 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "grad_norm": 2.4354922648897808, | |
| "learning_rate": 3.0876494023904387e-06, | |
| "loss": 0.2869, | |
| "sft_loss": 0.0277699186000973, | |
| "step": 155, | |
| "total_loss": 0.03829326016602863, | |
| "value_loss": 0.10523341278894804, | |
| "value_loss_search": 0.30548287588171663, | |
| "value_loss_thought": 0.536384426720906 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "grad_norm": 7.993819548921568, | |
| "learning_rate": 3.1872509960159366e-06, | |
| "loss": 0.2799, | |
| "sft_loss": 0.028502677148208023, | |
| "step": 160, | |
| "total_loss": 0.03686514748242189, | |
| "value_loss": 0.08362470217980444, | |
| "value_loss_search": 0.242223716438275, | |
| "value_loss_thought": 0.4267738984548487 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "grad_norm": 3.8893692041440224, | |
| "learning_rate": 3.2868525896414344e-06, | |
| "loss": 0.271, | |
| "sft_loss": 0.03119087303057313, | |
| "step": 165, | |
| "total_loss": 0.04075200489824056, | |
| "value_loss": 0.09561131574337196, | |
| "value_loss_search": 0.26875160563213285, | |
| "value_loss_thought": 0.4961389156705991 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "grad_norm": 3.77858117117453, | |
| "learning_rate": 3.3864541832669323e-06, | |
| "loss": 0.2499, | |
| "sft_loss": 0.019535421626642346, | |
| "step": 170, | |
| "total_loss": 0.025970515375956894, | |
| "value_loss": 0.06435093678010162, | |
| "value_loss_search": 0.20992231852004578, | |
| "value_loss_thought": 0.30488517887424677 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "grad_norm": 3.951277466183642, | |
| "learning_rate": 3.486055776892431e-06, | |
| "loss": 0.2693, | |
| "sft_loss": 0.023267556354403494, | |
| "step": 175, | |
| "total_loss": 0.030257757987055812, | |
| "value_loss": 0.0699020166444825, | |
| "value_loss_search": 0.21118213117242704, | |
| "value_loss_thought": 0.3480340027017519 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "grad_norm": 2.9207350101372365, | |
| "learning_rate": 3.585657370517929e-06, | |
| "loss": 0.262, | |
| "sft_loss": 0.025930221634916963, | |
| "step": 180, | |
| "total_loss": 0.03511905904761079, | |
| "value_loss": 0.09188836992834695, | |
| "value_loss_search": 0.26077198034647325, | |
| "value_loss_thought": 0.4743349766329629 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "grad_norm": 5.609912691724805, | |
| "learning_rate": 3.6852589641434267e-06, | |
| "loss": 0.2446, | |
| "sft_loss": 0.024912988301366568, | |
| "step": 185, | |
| "total_loss": 0.03355662206740817, | |
| "value_loss": 0.08643633674801095, | |
| "value_loss_search": 0.24081130932386258, | |
| "value_loss_thought": 0.4506793859531172 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "grad_norm": 3.5622387501649775, | |
| "learning_rate": 3.7848605577689246e-06, | |
| "loss": 0.2466, | |
| "sft_loss": 0.028595651406794785, | |
| "step": 190, | |
| "total_loss": 0.0361521876320694, | |
| "value_loss": 0.07556536267784394, | |
| "value_loss_search": 0.22485005248195195, | |
| "value_loss_thought": 0.37967284960468534 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "grad_norm": 5.1634897659031616, | |
| "learning_rate": 3.884462151394423e-06, | |
| "loss": 0.23, | |
| "sft_loss": 0.02528013151604682, | |
| "step": 195, | |
| "total_loss": 0.03363830259149836, | |
| "value_loss": 0.08358170948340557, | |
| "value_loss_search": 0.25611529394162746, | |
| "value_loss_thought": 0.4125383788938052 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "grad_norm": 2.0650765438648104, | |
| "learning_rate": 3.98406374501992e-06, | |
| "loss": 0.245, | |
| "sft_loss": 0.016848266730085014, | |
| "step": 200, | |
| "total_loss": 0.026103121823689436, | |
| "value_loss": 0.09254854982718826, | |
| "value_loss_search": 0.3132247169883044, | |
| "value_loss_thought": 0.4271636780547851 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "grad_norm": 5.039293863721038, | |
| "learning_rate": 4.083665338645419e-06, | |
| "loss": 0.2528, | |
| "sft_loss": 0.02782264384441078, | |
| "step": 205, | |
| "total_loss": 0.03652619049535133, | |
| "value_loss": 0.08703546431788708, | |
| "value_loss_search": 0.24252449526474038, | |
| "value_loss_thought": 0.453759221357177 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "grad_norm": 4.004833085846898, | |
| "learning_rate": 4.183266932270917e-06, | |
| "loss": 0.2485, | |
| "sft_loss": 0.03399158006068319, | |
| "step": 210, | |
| "total_loss": 0.04147085763215728, | |
| "value_loss": 0.07479277592756262, | |
| "value_loss_search": 0.24146745675992634, | |
| "value_loss_thought": 0.3568747464043554 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "grad_norm": 2.2900194027614567, | |
| "learning_rate": 4.282868525896415e-06, | |
| "loss": 0.2386, | |
| "sft_loss": 0.030579356662929057, | |
| "step": 215, | |
| "total_loss": 0.0377965772360767, | |
| "value_loss": 0.07217220571910729, | |
| "value_loss_search": 0.23593004742797347, | |
| "value_loss_thought": 0.34144759898772464 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "grad_norm": 2.2530806573737077, | |
| "learning_rate": 4.382470119521913e-06, | |
| "loss": 0.2305, | |
| "sft_loss": 0.02292049501556903, | |
| "step": 220, | |
| "total_loss": 0.028867545309185515, | |
| "value_loss": 0.05947050442919135, | |
| "value_loss_search": 0.17588968106138053, | |
| "value_loss_thought": 0.2998743573494721 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "grad_norm": 2.3848100156605976, | |
| "learning_rate": 4.482071713147411e-06, | |
| "loss": 0.2488, | |
| "sft_loss": 0.021163933374918998, | |
| "step": 225, | |
| "total_loss": 0.027522947750139793, | |
| "value_loss": 0.06359014347035555, | |
| "value_loss_search": 0.17336269621831663, | |
| "value_loss_thought": 0.33535845117003193 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "grad_norm": 3.155244067655952, | |
| "learning_rate": 4.581673306772908e-06, | |
| "loss": 0.2318, | |
| "sft_loss": 0.021529758046381176, | |
| "step": 230, | |
| "total_loss": 0.02896902564170887, | |
| "value_loss": 0.07439267357622156, | |
| "value_loss_search": 0.21162400761529626, | |
| "value_loss_thought": 0.3835173849183775 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "grad_norm": 3.8792764757968086, | |
| "learning_rate": 4.681274900398407e-06, | |
| "loss": 0.2156, | |
| "sft_loss": 0.023555860621854663, | |
| "step": 235, | |
| "total_loss": 0.03241225123529148, | |
| "value_loss": 0.08856390380533412, | |
| "value_loss_search": 0.24428551671026072, | |
| "value_loss_thought": 0.4642257096977119 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "grad_norm": 3.4958134022432183, | |
| "learning_rate": 4.780876494023905e-06, | |
| "loss": 0.238, | |
| "sft_loss": 0.024119808338582516, | |
| "step": 240, | |
| "total_loss": 0.02983194561311393, | |
| "value_loss": 0.05712137209266075, | |
| "value_loss_search": 0.17349829038121242, | |
| "value_loss_thought": 0.28347268382058244 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "grad_norm": 2.3555543700333303, | |
| "learning_rate": 4.880478087649403e-06, | |
| "loss": 0.2381, | |
| "sft_loss": 0.02051589481998235, | |
| "step": 245, | |
| "total_loss": 0.02669115279750258, | |
| "value_loss": 0.0617525799851137, | |
| "value_loss_search": 0.1842656165907101, | |
| "value_loss_thought": 0.3097550194659561 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "grad_norm": 7.0740358168657576, | |
| "learning_rate": 4.980079681274901e-06, | |
| "loss": 0.2514, | |
| "sft_loss": 0.02382271084934473, | |
| "step": 250, | |
| "total_loss": 0.031842488735128426, | |
| "value_loss": 0.08019777825520577, | |
| "value_loss_search": 0.2615329277869023, | |
| "value_loss_thought": 0.38004929782619 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "grad_norm": 2.299103577467398, | |
| "learning_rate": 4.9999611473368576e-06, | |
| "loss": 0.2199, | |
| "sft_loss": 0.02000407627783716, | |
| "step": 255, | |
| "total_loss": 0.025701836839834867, | |
| "value_loss": 0.05697760357270454, | |
| "value_loss_search": 0.14788065557549998, | |
| "value_loss_thought": 0.3079401723291085 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "grad_norm": 1.6807413310213553, | |
| "learning_rate": 4.999803310462543e-06, | |
| "loss": 0.2232, | |
| "sft_loss": 0.025435299216769635, | |
| "step": 260, | |
| "total_loss": 0.033880134870196345, | |
| "value_loss": 0.08444835591653828, | |
| "value_loss_search": 0.2635558438050339, | |
| "value_loss_thought": 0.4120309994032141 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "grad_norm": 3.984061493281701, | |
| "learning_rate": 4.999524068745182e-06, | |
| "loss": 0.2248, | |
| "sft_loss": 0.022567298170179127, | |
| "step": 265, | |
| "total_loss": 0.02977634134258551, | |
| "value_loss": 0.07209043111970459, | |
| "value_loss_search": 0.22659471631785663, | |
| "value_loss_thought": 0.35012873328050775 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "grad_norm": 5.581660893537601, | |
| "learning_rate": 4.99912343574636e-06, | |
| "loss": 0.2457, | |
| "sft_loss": 0.02686069840565324, | |
| "step": 270, | |
| "total_loss": 0.033531226357308694, | |
| "value_loss": 0.06670527966834924, | |
| "value_loss_search": 0.21075203863763364, | |
| "value_loss_thought": 0.3228901971851883 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "grad_norm": 4.189848962908778, | |
| "learning_rate": 4.998601430923122e-06, | |
| "loss": 0.2437, | |
| "sft_loss": 0.025497494312003256, | |
| "step": 275, | |
| "total_loss": 0.031059039360116003, | |
| "value_loss": 0.0556154465644795, | |
| "value_loss_search": 0.16651217907464116, | |
| "value_loss_thought": 0.27841138996300285 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "grad_norm": 1.2840019127815288, | |
| "learning_rate": 4.997958079627029e-06, | |
| "loss": 0.2337, | |
| "sft_loss": 0.02229512729682028, | |
| "step": 280, | |
| "total_loss": 0.02872599834945504, | |
| "value_loss": 0.06430871063357699, | |
| "value_loss_search": 0.15088917155694617, | |
| "value_loss_thought": 0.3635805150745 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "grad_norm": 1.9906954863509052, | |
| "learning_rate": 4.997193413102923e-06, | |
| "loss": 0.2358, | |
| "sft_loss": 0.030755233392119406, | |
| "step": 285, | |
| "total_loss": 0.03727109958572328, | |
| "value_loss": 0.06515866482732235, | |
| "value_loss_search": 0.19967410092722276, | |
| "value_loss_thought": 0.3215952147511416 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "grad_norm": 2.0542643472869706, | |
| "learning_rate": 4.996307468487414e-06, | |
| "loss": 0.222, | |
| "sft_loss": 0.02293794075958431, | |
| "step": 290, | |
| "total_loss": 0.027633006192809263, | |
| "value_loss": 0.04695065361320303, | |
| "value_loss_search": 0.1386001783932443, | |
| "value_loss_thought": 0.23700505015585804 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "grad_norm": 2.1566271207415375, | |
| "learning_rate": 4.995300288807075e-06, | |
| "loss": 0.2254, | |
| "sft_loss": 0.024227931816130876, | |
| "step": 295, | |
| "total_loss": 0.0282421637364223, | |
| "value_loss": 0.04014231975597795, | |
| "value_loss_search": 0.13643495284136406, | |
| "value_loss_thought": 0.18470360402425284 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "grad_norm": 1.780628989439151, | |
| "learning_rate": 4.994171922976349e-06, | |
| "loss": 0.226, | |
| "sft_loss": 0.019921383424662053, | |
| "step": 300, | |
| "total_loss": 0.02442953032641526, | |
| "value_loss": 0.045081469729302624, | |
| "value_loss_search": 0.1360888234203003, | |
| "value_loss_thought": 0.22456293730974722 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "grad_norm": 2.350607243352896, | |
| "learning_rate": 4.992922425795179e-06, | |
| "loss": 0.2152, | |
| "sft_loss": 0.021342089958488943, | |
| "step": 305, | |
| "total_loss": 0.02731037002786252, | |
| "value_loss": 0.059682800685004624, | |
| "value_loss_search": 0.14473845993270515, | |
| "value_loss_thought": 0.33272394547384465 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "grad_norm": 2.476250174714291, | |
| "learning_rate": 4.991551857946343e-06, | |
| "loss": 0.2167, | |
| "sft_loss": 0.024368287762627006, | |
| "step": 310, | |
| "total_loss": 0.02891062788005456, | |
| "value_loss": 0.04542339965519204, | |
| "value_loss_search": 0.13215827879938616, | |
| "value_loss_thought": 0.23122892067385692 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "grad_norm": 2.1211906530083304, | |
| "learning_rate": 4.990060285992507e-06, | |
| "loss": 0.2267, | |
| "sft_loss": 0.03224845631048083, | |
| "step": 315, | |
| "total_loss": 0.0367003864619619, | |
| "value_loss": 0.044519302164553666, | |
| "value_loss_search": 0.13228539347510376, | |
| "value_loss_thought": 0.22386902507196282 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "grad_norm": 4.051009811709579, | |
| "learning_rate": 4.988447782372996e-06, | |
| "loss": 0.2227, | |
| "sft_loss": 0.015061728050932288, | |
| "step": 320, | |
| "total_loss": 0.020883486873708536, | |
| "value_loss": 0.0582175869083585, | |
| "value_loss_search": 0.18025588636555767, | |
| "value_loss_thought": 0.28548481071411513 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "grad_norm": 6.185126993035625, | |
| "learning_rate": 4.986714425400269e-06, | |
| "loss": 0.2242, | |
| "sft_loss": 0.02483751201070845, | |
| "step": 325, | |
| "total_loss": 0.029222130161724636, | |
| "value_loss": 0.043846180192485915, | |
| "value_loss_search": 0.126026460820583, | |
| "value_loss_thought": 0.22474298011511565 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "grad_norm": 1.2748032727970502, | |
| "learning_rate": 4.98486029925612e-06, | |
| "loss": 0.2113, | |
| "sft_loss": 0.022788235195912422, | |
| "step": 330, | |
| "total_loss": 0.027279667726634215, | |
| "value_loss": 0.04491432422255457, | |
| "value_loss_search": 0.14298201398346463, | |
| "value_loss_thought": 0.21633257850262452 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 1.552484041484157, | |
| "learning_rate": 4.982885493987595e-06, | |
| "loss": 0.2124, | |
| "sft_loss": 0.02811491028405726, | |
| "step": 335, | |
| "total_loss": 0.032856917950812206, | |
| "value_loss": 0.04742007501154148, | |
| "value_loss_search": 0.15364139593775689, | |
| "value_loss_thought": 0.22571920259069883 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "grad_norm": 2.7785428382948316, | |
| "learning_rate": 4.9807901055026054e-06, | |
| "loss": 0.1788, | |
| "sft_loss": 0.018437814386561512, | |
| "step": 340, | |
| "total_loss": 0.02146597366839842, | |
| "value_loss": 0.03028159231407699, | |
| "value_loss_search": 0.10903610251207282, | |
| "value_loss_thought": 0.13321663648093818 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "grad_norm": 1.2850611180768896, | |
| "learning_rate": 4.978574235565284e-06, | |
| "loss": 0.1788, | |
| "sft_loss": 0.023076185397803782, | |
| "step": 345, | |
| "total_loss": 0.027342022850689318, | |
| "value_loss": 0.04265837599887164, | |
| "value_loss_search": 0.13071881325704454, | |
| "value_loss_thought": 0.21054819686614792 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "grad_norm": 1.3911394308456917, | |
| "learning_rate": 4.976237991791033e-06, | |
| "loss": 0.1719, | |
| "sft_loss": 0.019436489534564318, | |
| "step": 350, | |
| "total_loss": 0.023576461341781395, | |
| "value_loss": 0.041399715623504106, | |
| "value_loss_search": 0.1492293958552068, | |
| "value_loss_thought": 0.18196833048205008 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "grad_norm": 1.6481312891558262, | |
| "learning_rate": 4.973781487641303e-06, | |
| "loss": 0.1844, | |
| "sft_loss": 0.025559269287623466, | |
| "step": 355, | |
| "total_loss": 0.02877166439211578, | |
| "value_loss": 0.032123952108486266, | |
| "value_loss_search": 0.11874442657458531, | |
| "value_loss_thought": 0.13824718931678034 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "grad_norm": 1.103781032562002, | |
| "learning_rate": 4.9712048424180806e-06, | |
| "loss": 0.1862, | |
| "sft_loss": 0.02381216634530574, | |
| "step": 360, | |
| "total_loss": 0.02632210613701318, | |
| "value_loss": 0.025099397322048845, | |
| "value_loss_search": 0.08361520563119029, | |
| "value_loss_thought": 0.11717997152591124 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "grad_norm": 2.0386255202501173, | |
| "learning_rate": 4.968508181258093e-06, | |
| "loss": 0.1637, | |
| "sft_loss": 0.01670523874927312, | |
| "step": 365, | |
| "total_loss": 0.019544158500229968, | |
| "value_loss": 0.028389195861564076, | |
| "value_loss_search": 0.08506083722818403, | |
| "value_loss_thought": 0.14205273159604986 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "grad_norm": 1.7600289408433292, | |
| "learning_rate": 4.965691635126737e-06, | |
| "loss": 0.1816, | |
| "sft_loss": 0.0221649584826082, | |
| "step": 370, | |
| "total_loss": 0.02530371010229828, | |
| "value_loss": 0.03138751635051449, | |
| "value_loss_search": 0.099762270176916, | |
| "value_loss_thought": 0.1513378613846726 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "grad_norm": 1.2580654310157564, | |
| "learning_rate": 4.962755340811709e-06, | |
| "loss": 0.179, | |
| "sft_loss": 0.01950194430537522, | |
| "step": 375, | |
| "total_loss": 0.022580356493926958, | |
| "value_loss": 0.03078412101158392, | |
| "value_loss_search": 0.09001722024981973, | |
| "value_loss_thought": 0.15625574861769564 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "grad_norm": 2.234333438325428, | |
| "learning_rate": 4.959699440916369e-06, | |
| "loss": 0.1749, | |
| "sft_loss": 0.022818994731642304, | |
| "step": 380, | |
| "total_loss": 0.025907937735428276, | |
| "value_loss": 0.030889428492628213, | |
| "value_loss_search": 0.08153837720744832, | |
| "value_loss_thought": 0.16557704737147105 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "grad_norm": 1.085143168299747, | |
| "learning_rate": 4.956524083852812e-06, | |
| "loss": 0.1667, | |
| "sft_loss": 0.01802813063841313, | |
| "step": 385, | |
| "total_loss": 0.020953079688115394, | |
| "value_loss": 0.029249489003632334, | |
| "value_loss_search": 0.09198170516392565, | |
| "value_loss_thought": 0.14201420772205892 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "grad_norm": 2.242763660758448, | |
| "learning_rate": 4.953229423834662e-06, | |
| "loss": 0.1818, | |
| "sft_loss": 0.011760137742385268, | |
| "step": 390, | |
| "total_loss": 0.015489974882802926, | |
| "value_loss": 0.03729837100072473, | |
| "value_loss_search": 0.08624583505323927, | |
| "value_loss_thought": 0.21214113264650222 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "grad_norm": 0.8938036920537223, | |
| "learning_rate": 4.949815620869579e-06, | |
| "loss": 0.1824, | |
| "sft_loss": 0.02469517719000578, | |
| "step": 395, | |
| "total_loss": 0.0275583725127035, | |
| "value_loss": 0.02863195287209237, | |
| "value_loss_search": 0.08686879273809608, | |
| "value_loss_thought": 0.1421868310884747 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "grad_norm": 0.7973027480525212, | |
| "learning_rate": 4.946282840751494e-06, | |
| "loss": 0.1769, | |
| "sft_loss": 0.018014212045818566, | |
| "step": 400, | |
| "total_loss": 0.021850849609819534, | |
| "value_loss": 0.03836637459389749, | |
| "value_loss_search": 0.07837754817135192, | |
| "value_loss_thought": 0.22855344959643845 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "grad_norm": 1.2529435303540792, | |
| "learning_rate": 4.942631255052551e-06, | |
| "loss": 0.1728, | |
| "sft_loss": 0.015492680622264743, | |
| "step": 405, | |
| "total_loss": 0.018379339482680734, | |
| "value_loss": 0.028866587373761375, | |
| "value_loss_search": 0.09674510946805412, | |
| "value_loss_thought": 0.1341875916292338 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "grad_norm": 0.7134673687996919, | |
| "learning_rate": 4.938861041114779e-06, | |
| "loss": 0.1664, | |
| "sft_loss": 0.01866905202623457, | |
| "step": 410, | |
| "total_loss": 0.020626170139428267, | |
| "value_loss": 0.019571180698403624, | |
| "value_loss_search": 0.06965761288097383, | |
| "value_loss_thought": 0.08691183333139633 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "grad_norm": 0.7529741551559901, | |
| "learning_rate": 4.934972382041475e-06, | |
| "loss": 0.1849, | |
| "sft_loss": 0.019827575120143593, | |
| "step": 415, | |
| "total_loss": 0.022788787103309005, | |
| "value_loss": 0.029612120029923972, | |
| "value_loss_search": 0.0888989626518196, | |
| "value_loss_thought": 0.1479980006653932 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "grad_norm": 1.7995496682865593, | |
| "learning_rate": 4.9309654666883165e-06, | |
| "loss": 0.179, | |
| "sft_loss": 0.020990492962300777, | |
| "step": 420, | |
| "total_loss": 0.024445655822250955, | |
| "value_loss": 0.03455162828067841, | |
| "value_loss_search": 0.08684888708735343, | |
| "value_loss_thought": 0.18956413638661615 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "grad_norm": 1.670690442471347, | |
| "learning_rate": 4.926840489654184e-06, | |
| "loss": 0.1894, | |
| "sft_loss": 0.02077859474811703, | |
| "step": 425, | |
| "total_loss": 0.024163767833670136, | |
| "value_loss": 0.033851731998584, | |
| "value_loss_search": 0.08538484702294227, | |
| "value_loss_thought": 0.18542900827596895 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "grad_norm": 1.8704656616088737, | |
| "learning_rate": 4.922597651271716e-06, | |
| "loss": 0.1927, | |
| "sft_loss": 0.02478037038818002, | |
| "step": 430, | |
| "total_loss": 0.02731952823942265, | |
| "value_loss": 0.025391577205482463, | |
| "value_loss_search": 0.08109033311634448, | |
| "value_loss_thought": 0.12204228106857044 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "grad_norm": 1.3917455631379416, | |
| "learning_rate": 4.918237157597574e-06, | |
| "loss": 0.1792, | |
| "sft_loss": 0.01933148135431111, | |
| "step": 435, | |
| "total_loss": 0.022168212975520872, | |
| "value_loss": 0.02836731561783381, | |
| "value_loss_search": 0.0816025677968355, | |
| "value_loss_thought": 0.1453359603422541 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "grad_norm": 1.0019606814299877, | |
| "learning_rate": 4.913759220402441e-06, | |
| "loss": 0.1911, | |
| "sft_loss": 0.019400009652599692, | |
| "step": 440, | |
| "total_loss": 0.02239533064052921, | |
| "value_loss": 0.029953211315478255, | |
| "value_loss_search": 0.10217987555329273, | |
| "value_loss_thought": 0.13744581426071817 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "grad_norm": 1.2317965960561872, | |
| "learning_rate": 4.9091640571607295e-06, | |
| "loss": 0.1827, | |
| "sft_loss": 0.016407015430741013, | |
| "step": 445, | |
| "total_loss": 0.018946515550851473, | |
| "value_loss": 0.0253950018544856, | |
| "value_loss_search": 0.07369275801845561, | |
| "value_loss_thought": 0.12946725602414516 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "grad_norm": 0.683140754700333, | |
| "learning_rate": 4.9044518910400285e-06, | |
| "loss": 0.1676, | |
| "sft_loss": 0.012768425536341966, | |
| "step": 450, | |
| "total_loss": 0.014526197344821412, | |
| "value_loss": 0.017577718090615237, | |
| "value_loss_search": 0.04510120849572559, | |
| "value_loss_thought": 0.09552053666211577 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "grad_norm": 1.0907748085551225, | |
| "learning_rate": 4.899622950890258e-06, | |
| "loss": 0.1837, | |
| "sft_loss": 0.017968191439285873, | |
| "step": 455, | |
| "total_loss": 0.02001593358502305, | |
| "value_loss": 0.020477420029783387, | |
| "value_loss_search": 0.04958656680909144, | |
| "value_loss_thought": 0.11423279177743098 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "grad_norm": 1.3850337152675154, | |
| "learning_rate": 4.894677471232556e-06, | |
| "loss": 0.1908, | |
| "sft_loss": 0.01755859658587724, | |
| "step": 460, | |
| "total_loss": 0.02234408485255699, | |
| "value_loss": 0.04785487991980517, | |
| "value_loss_search": 0.1180493140761314, | |
| "value_loss_thought": 0.26478972275981505 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "grad_norm": 1.1181978133661836, | |
| "learning_rate": 4.889615692247893e-06, | |
| "loss": 0.1841, | |
| "sft_loss": 0.021812843438237906, | |
| "step": 465, | |
| "total_loss": 0.025394389864322874, | |
| "value_loss": 0.035815464633651574, | |
| "value_loss_search": 0.09992547294255019, | |
| "value_loss_thought": 0.18659824333662983 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "grad_norm": 2.0759753145829025, | |
| "learning_rate": 4.884437859765403e-06, | |
| "loss": 0.1824, | |
| "sft_loss": 0.026800552336499094, | |
| "step": 470, | |
| "total_loss": 0.030636231325843255, | |
| "value_loss": 0.03835678935120086, | |
| "value_loss_search": 0.08862242994021016, | |
| "value_loss_thought": 0.2182318838626088 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "grad_norm": 1.4431529345376723, | |
| "learning_rate": 4.879144225250445e-06, | |
| "loss": 0.1816, | |
| "sft_loss": 0.017374001140706242, | |
| "step": 475, | |
| "total_loss": 0.01985499007805629, | |
| "value_loss": 0.02480988986644661, | |
| "value_loss_search": 0.06347666834494703, | |
| "value_loss_thought": 0.1350024510633375 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "grad_norm": 1.4247925282572933, | |
| "learning_rate": 4.873735045792395e-06, | |
| "loss": 0.1818, | |
| "sft_loss": 0.01573096849024296, | |
| "step": 480, | |
| "total_loss": 0.018171964960481547, | |
| "value_loss": 0.02440996536379316, | |
| "value_loss_search": 0.06245305103024066, | |
| "value_loss_thought": 0.13282667167441103 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "grad_norm": 0.9971468267328366, | |
| "learning_rate": 4.868210584092151e-06, | |
| "loss": 0.1467, | |
| "sft_loss": 0.014233330194838346, | |
| "step": 485, | |
| "total_loss": 0.017078536024837377, | |
| "value_loss": 0.028452056018250006, | |
| "value_loss_search": 0.08309140489452602, | |
| "value_loss_thought": 0.14452504066284746 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "grad_norm": 1.023944906300116, | |
| "learning_rate": 4.862571108449387e-06, | |
| "loss": 0.1698, | |
| "sft_loss": 0.015431807213462889, | |
| "step": 490, | |
| "total_loss": 0.01847703835471748, | |
| "value_loss": 0.030452309869269813, | |
| "value_loss_search": 0.068971808032029, | |
| "value_loss_thought": 0.1746466698442873 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "grad_norm": 2.461096472197934, | |
| "learning_rate": 4.856816892749512e-06, | |
| "loss": 0.1971, | |
| "sft_loss": 0.023769452376291154, | |
| "step": 495, | |
| "total_loss": 0.02749020796882178, | |
| "value_loss": 0.03720755761728469, | |
| "value_loss_search": 0.09986964019751668, | |
| "value_loss_thought": 0.19779082740396917 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "grad_norm": 3.0866796216704744, | |
| "learning_rate": 4.850948216450374e-06, | |
| "loss": 0.1685, | |
| "sft_loss": 0.014484391221776605, | |
| "step": 500, | |
| "total_loss": 0.017671175975374355, | |
| "value_loss": 0.03186784757053829, | |
| "value_loss_search": 0.08305875552287034, | |
| "value_loss_thought": 0.17188402622778085 | |
| }, | |
| { | |
| "epoch": 3.01, | |
| "grad_norm": 1.5870923337393499, | |
| "learning_rate": 4.844965364568688e-06, | |
| "loss": 0.1597, | |
| "sft_loss": 0.012694860575720668, | |
| "step": 505, | |
| "total_loss": 0.014846146440197572, | |
| "value_loss": 0.021512858869391492, | |
| "value_loss_search": 0.0868586509443503, | |
| "value_loss_thought": 0.08524421998045 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "grad_norm": 1.2125544575036498, | |
| "learning_rate": 4.838868627666191e-06, | |
| "loss": 0.1309, | |
| "sft_loss": 0.0139304670272395, | |
| "step": 510, | |
| "total_loss": 0.015479093678698064, | |
| "value_loss": 0.01548626560065145, | |
| "value_loss_search": 0.04746301869455181, | |
| "value_loss_thought": 0.0764271073458076 | |
| }, | |
| { | |
| "epoch": 3.07, | |
| "grad_norm": 2.334834842181477, | |
| "learning_rate": 4.8326583018355325e-06, | |
| "loss": 0.1387, | |
| "sft_loss": 0.017326009750831872, | |
| "step": 515, | |
| "total_loss": 0.018803953268570695, | |
| "value_loss": 0.014779433516605423, | |
| "value_loss_search": 0.04713020703025848, | |
| "value_loss_thought": 0.07110526093983935 | |
| }, | |
| { | |
| "epoch": 3.1, | |
| "grad_norm": 1.5097098181886008, | |
| "learning_rate": 4.826334688685895e-06, | |
| "loss": 0.1436, | |
| "sft_loss": 0.015178992017172276, | |
| "step": 520, | |
| "total_loss": 0.01744569096995292, | |
| "value_loss": 0.022666989314620876, | |
| "value_loss_search": 0.056544199488735104, | |
| "value_loss_thought": 0.12479171633535771 | |
| }, | |
| { | |
| "epoch": 3.13, | |
| "grad_norm": 1.3535205448949577, | |
| "learning_rate": 4.819898095328346e-06, | |
| "loss": 0.1298, | |
| "sft_loss": 0.012230370636098086, | |
| "step": 525, | |
| "total_loss": 0.013584458573981805, | |
| "value_loss": 0.013540879723450416, | |
| "value_loss_search": 0.05891674056322245, | |
| "value_loss_thought": 0.04941029600959155 | |
| }, | |
| { | |
| "epoch": 3.16, | |
| "grad_norm": 1.3125271270813312, | |
| "learning_rate": 4.8133488343609225e-06, | |
| "loss": 0.1235, | |
| "sft_loss": 0.011256256676279008, | |
| "step": 530, | |
| "total_loss": 0.01235277638277239, | |
| "value_loss": 0.010965196802611387, | |
| "value_loss_search": 0.03379480599472799, | |
| "value_loss_thought": 0.05392676859810308 | |
| }, | |
| { | |
| "epoch": 3.19, | |
| "grad_norm": 1.2773968571118655, | |
| "learning_rate": 4.8066872238534475e-06, | |
| "loss": 0.1315, | |
| "sft_loss": 0.015837551723234356, | |
| "step": 535, | |
| "total_loss": 0.01701334770821745, | |
| "value_loss": 0.011757960020167957, | |
| "value_loss_search": 0.04275373071531021, | |
| "value_loss_thought": 0.05130994958924475 | |
| }, | |
| { | |
| "epoch": 3.22, | |
| "grad_norm": 2.250161610850135, | |
| "learning_rate": 4.799913587332087e-06, | |
| "loss": 0.1336, | |
| "sft_loss": 0.01434246387798339, | |
| "step": 540, | |
| "total_loss": 0.017333269701339304, | |
| "value_loss": 0.02990805795383835, | |
| "value_loss_search": 0.1112344439959088, | |
| "value_loss_thought": 0.1280300162234198 | |
| }, | |
| { | |
| "epoch": 3.25, | |
| "grad_norm": 1.7005049809514607, | |
| "learning_rate": 4.793028253763633e-06, | |
| "loss": 0.1382, | |
| "sft_loss": 0.017506125732325017, | |
| "step": 545, | |
| "total_loss": 0.018909330308576954, | |
| "value_loss": 0.014032045881458543, | |
| "value_loss_search": 0.04037052274323969, | |
| "value_loss_thought": 0.0718858446669401 | |
| }, | |
| { | |
| "epoch": 3.28, | |
| "grad_norm": 0.7583090366866635, | |
| "learning_rate": 4.786031557539532e-06, | |
| "loss": 0.1302, | |
| "sft_loss": 0.01899501702282578, | |
| "step": 550, | |
| "total_loss": 0.020867432708973864, | |
| "value_loss": 0.018724157162853316, | |
| "value_loss_search": 0.0525303708315505, | |
| "value_loss_thought": 0.09726288783901964 | |
| }, | |
| { | |
| "epoch": 3.31, | |
| "grad_norm": 0.6780107958188984, | |
| "learning_rate": 4.7789238384596394e-06, | |
| "loss": 0.1332, | |
| "sft_loss": 0.01216251152800396, | |
| "step": 555, | |
| "total_loss": 0.013824499693544112, | |
| "value_loss": 0.016619880075199944, | |
| "value_loss_search": 0.043466050122447086, | |
| "value_loss_thought": 0.08949299015621363 | |
| }, | |
| { | |
| "epoch": 3.34, | |
| "grad_norm": 1.186243776959505, | |
| "learning_rate": 4.771705441715722e-06, | |
| "loss": 0.1337, | |
| "sft_loss": 0.012400240125134587, | |
| "step": 560, | |
| "total_loss": 0.015292362061109089, | |
| "value_loss": 0.02892121910044807, | |
| "value_loss_search": 0.07066335178838017, | |
| "value_loss_thought": 0.160706402354117 | |
| }, | |
| { | |
| "epoch": 3.37, | |
| "grad_norm": 0.8074975390166714, | |
| "learning_rate": 4.76437671787469e-06, | |
| "loss": 0.1276, | |
| "sft_loss": 0.013388467975892127, | |
| "step": 565, | |
| "total_loss": 0.014532541646792652, | |
| "value_loss": 0.011440736869690226, | |
| "value_loss_search": 0.042682257217265374, | |
| "value_loss_thought": 0.04884363801356813 | |
| }, | |
| { | |
| "epoch": 3.4, | |
| "grad_norm": 1.1536407465687832, | |
| "learning_rate": 4.756938022861575e-06, | |
| "loss": 0.1361, | |
| "sft_loss": 0.014692733390256762, | |
| "step": 570, | |
| "total_loss": 0.016363070430702464, | |
| "value_loss": 0.01670336974376596, | |
| "value_loss_search": 0.05988950296840585, | |
| "value_loss_thought": 0.07373745448749106 | |
| }, | |
| { | |
| "epoch": 3.43, | |
| "grad_norm": 1.296361708786197, | |
| "learning_rate": 4.7493897179422366e-06, | |
| "loss": 0.1262, | |
| "sft_loss": 0.014538243343122304, | |
| "step": 575, | |
| "total_loss": 0.016324132579029538, | |
| "value_loss": 0.017858891680498347, | |
| "value_loss_search": 0.043000358385506844, | |
| "value_loss_thought": 0.09987077550104004 | |
| }, | |
| { | |
| "epoch": 3.46, | |
| "grad_norm": 0.7473511902604624, | |
| "learning_rate": 4.741732169705829e-06, | |
| "loss": 0.128, | |
| "sft_loss": 0.01885277854744345, | |
| "step": 580, | |
| "total_loss": 0.020614391867351856, | |
| "value_loss": 0.01761613485025464, | |
| "value_loss_search": 0.07391770112133145, | |
| "value_loss_thought": 0.06701137741147249 | |
| }, | |
| { | |
| "epoch": 3.49, | |
| "grad_norm": 0.8672128287076202, | |
| "learning_rate": 4.733965750046987e-06, | |
| "loss": 0.1306, | |
| "sft_loss": 0.011551224719733, | |
| "step": 585, | |
| "total_loss": 0.012892319760794635, | |
| "value_loss": 0.013410949158787844, | |
| "value_loss_search": 0.044143242872939935, | |
| "value_loss_thought": 0.06314435001149832 | |
| }, | |
| { | |
| "epoch": 3.52, | |
| "grad_norm": 1.1458177098839266, | |
| "learning_rate": 4.72609083614777e-06, | |
| "loss": 0.1292, | |
| "sft_loss": 0.014341436792165041, | |
| "step": 590, | |
| "total_loss": 0.015776935562007564, | |
| "value_loss": 0.014354987322531088, | |
| "value_loss_search": 0.05364567942300482, | |
| "value_loss_thought": 0.06119421870671431 | |
| }, | |
| { | |
| "epoch": 3.55, | |
| "grad_norm": 0.9268770278205384, | |
| "learning_rate": 4.71810781045934e-06, | |
| "loss": 0.1297, | |
| "sft_loss": 0.015257505606859923, | |
| "step": 595, | |
| "total_loss": 0.017370533025075473, | |
| "value_loss": 0.02113027337927633, | |
| "value_loss_search": 0.059170720773727224, | |
| "value_loss_thought": 0.1098714638722413 | |
| }, | |
| { | |
| "epoch": 3.58, | |
| "grad_norm": 0.868085256277253, | |
| "learning_rate": 4.710017060683396e-06, | |
| "loss": 0.1425, | |
| "sft_loss": 0.01423517488874495, | |
| "step": 600, | |
| "total_loss": 0.01731622647575932, | |
| "value_loss": 0.03081051432359345, | |
| "value_loss_search": 0.0980118739993486, | |
| "value_loss_thought": 0.14847224191835268 | |
| }, | |
| { | |
| "epoch": 3.61, | |
| "grad_norm": 1.0537209668511847, | |
| "learning_rate": 4.701818979753331e-06, | |
| "loss": 0.1321, | |
| "sft_loss": 0.014892896311357618, | |
| "step": 605, | |
| "total_loss": 0.016185236236145782, | |
| "value_loss": 0.012923399927421997, | |
| "value_loss_search": 0.04884319881856527, | |
| "value_loss_thought": 0.05454400067619645 | |
| }, | |
| { | |
| "epoch": 3.64, | |
| "grad_norm": 0.8700570072330364, | |
| "learning_rate": 4.693513965815163e-06, | |
| "loss": 0.1508, | |
| "sft_loss": 0.019580099708400668, | |
| "step": 610, | |
| "total_loss": 0.02092106218860863, | |
| "value_loss": 0.013409624124460607, | |
| "value_loss_search": 0.04490554911869822, | |
| "value_loss_thought": 0.06237144449660263 | |
| }, | |
| { | |
| "epoch": 3.67, | |
| "grad_norm": 1.2788774174216047, | |
| "learning_rate": 4.6851024222081905e-06, | |
| "loss": 0.1375, | |
| "sft_loss": 0.014192894287407398, | |
| "step": 615, | |
| "total_loss": 0.015467491799489608, | |
| "value_loss": 0.012745974849167396, | |
| "value_loss_search": 0.05650571903670425, | |
| "value_loss_thought": 0.045462077876436524 | |
| }, | |
| { | |
| "epoch": 3.7, | |
| "grad_norm": 1.1384751682462817, | |
| "learning_rate": 4.676584757445406e-06, | |
| "loss": 0.1391, | |
| "sft_loss": 0.01579129050951451, | |
| "step": 620, | |
| "total_loss": 0.017925446468007068, | |
| "value_loss": 0.021341558844778775, | |
| "value_loss_search": 0.04930673633061815, | |
| "value_loss_thought": 0.12142573465496298 | |
| }, | |
| { | |
| "epoch": 3.73, | |
| "grad_norm": 0.9386975353990433, | |
| "learning_rate": 4.667961385193656e-06, | |
| "loss": 0.1358, | |
| "sft_loss": 0.01762553579173982, | |
| "step": 625, | |
| "total_loss": 0.01908148711366948, | |
| "value_loss": 0.014559512600328617, | |
| "value_loss_search": 0.0564095867106289, | |
| "value_loss_thought": 0.06006651317602518 | |
| }, | |
| { | |
| "epoch": 3.76, | |
| "grad_norm": 1.09540391640531, | |
| "learning_rate": 4.659232724253553e-06, | |
| "loss": 0.1426, | |
| "sft_loss": 0.012116698501631617, | |
| "step": 630, | |
| "total_loss": 0.013968724870528604, | |
| "value_loss": 0.01852026394344648, | |
| "value_loss_search": 0.037771890809654, | |
| "value_loss_thought": 0.11039022141931128 | |
| }, | |
| { | |
| "epoch": 3.79, | |
| "grad_norm": 1.1403346821597187, | |
| "learning_rate": 4.650399198539132e-06, | |
| "loss": 0.14, | |
| "sft_loss": 0.017072572163306175, | |
| "step": 635, | |
| "total_loss": 0.018839041521141554, | |
| "value_loss": 0.017664692708285655, | |
| "value_loss_search": 0.0408900124222555, | |
| "value_loss_thought": 0.10042752822591865 | |
| }, | |
| { | |
| "epoch": 3.82, | |
| "grad_norm": 0.8573467809405049, | |
| "learning_rate": 4.641461237057267e-06, | |
| "loss": 0.1402, | |
| "sft_loss": 0.01425269797910005, | |
| "step": 640, | |
| "total_loss": 0.01625731455909545, | |
| "value_loss": 0.020046164246105037, | |
| "value_loss_search": 0.06718737064234119, | |
| "value_loss_thought": 0.09318194256302377 | |
| }, | |
| { | |
| "epoch": 3.85, | |
| "grad_norm": 0.6970362791399626, | |
| "learning_rate": 4.632419273886835e-06, | |
| "loss": 0.1257, | |
| "sft_loss": 0.013124658446758985, | |
| "step": 645, | |
| "total_loss": 0.014949945608714188, | |
| "value_loss": 0.01825287001252036, | |
| "value_loss_search": 0.047629779761018655, | |
| "value_loss_thought": 0.09839317841679077 | |
| }, | |
| { | |
| "epoch": 3.88, | |
| "grad_norm": 1.0570192724917926, | |
| "learning_rate": 4.62327374815763e-06, | |
| "loss": 0.1318, | |
| "sft_loss": 0.014157434459775686, | |
| "step": 650, | |
| "total_loss": 0.016517047015622666, | |
| "value_loss": 0.023596125619087617, | |
| "value_loss_search": 0.06047542081159918, | |
| "value_loss_thought": 0.12829358246017364 | |
| }, | |
| { | |
| "epoch": 3.91, | |
| "grad_norm": 1.4519497262292758, | |
| "learning_rate": 4.614025104029046e-06, | |
| "loss": 0.1304, | |
| "sft_loss": 0.015446552366483957, | |
| "step": 655, | |
| "total_loss": 0.017752421715067614, | |
| "value_loss": 0.02305869280771731, | |
| "value_loss_search": 0.05821381665200533, | |
| "value_loss_thought": 0.12625572479846597 | |
| }, | |
| { | |
| "epoch": 3.94, | |
| "grad_norm": 1.4038621587755458, | |
| "learning_rate": 4.6046737906684955e-06, | |
| "loss": 0.1222, | |
| "sft_loss": 0.013910213205963372, | |
| "step": 660, | |
| "total_loss": 0.016714950121104265, | |
| "value_loss": 0.028047367616863994, | |
| "value_loss_search": 0.07310631623822701, | |
| "value_loss_thought": 0.15127262340945435 | |
| }, | |
| { | |
| "epoch": 3.97, | |
| "grad_norm": 1.5045007836682485, | |
| "learning_rate": 4.5952202622296015e-06, | |
| "loss": 0.1354, | |
| "sft_loss": 0.013794716726988555, | |
| "step": 665, | |
| "total_loss": 0.015795043228149553, | |
| "value_loss": 0.020003264624756413, | |
| "value_loss_search": 0.0463893650089517, | |
| "value_loss_thought": 0.11363675205575419 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 1.1345147085500709, | |
| "learning_rate": 4.585664977830142e-06, | |
| "loss": 0.1411, | |
| "sft_loss": 0.01729184603318572, | |
| "step": 670, | |
| "total_loss": 0.01970745405710659, | |
| "value_loss": 0.024156079764179595, | |
| "value_loss_search": 0.04846977311616456, | |
| "value_loss_thought": 0.14477886538206802 | |
| }, | |
| { | |
| "epoch": 4.03, | |
| "grad_norm": 0.7700098195196915, | |
| "learning_rate": 4.576008401529746e-06, | |
| "loss": 0.0925, | |
| "sft_loss": 0.00861083798808977, | |
| "step": 675, | |
| "total_loss": 0.009520014304735014, | |
| "value_loss": 0.009091763565447764, | |
| "value_loss_search": 0.02745309268981373, | |
| "value_loss_thought": 0.04528101538708142 | |
| }, | |
| { | |
| "epoch": 4.06, | |
| "grad_norm": 1.2543052602388458, | |
| "learning_rate": 4.566251002307363e-06, | |
| "loss": 0.0957, | |
| "sft_loss": 0.009448495891410858, | |
| "step": 680, | |
| "total_loss": 0.010420851569779187, | |
| "value_loss": 0.00972355670351135, | |
| "value_loss_search": 0.043272700070008344, | |
| "value_loss_thought": 0.03451575428043725 | |
| }, | |
| { | |
| "epoch": 4.09, | |
| "grad_norm": 0.7516221051255656, | |
| "learning_rate": 4.556393254038486e-06, | |
| "loss": 0.0866, | |
| "sft_loss": 0.00772422740701586, | |
| "step": 685, | |
| "total_loss": 0.008484165449101511, | |
| "value_loss": 0.007599379828752717, | |
| "value_loss_search": 0.028828184264500577, | |
| "value_loss_thought": 0.03196685446855554 | |
| }, | |
| { | |
| "epoch": 4.12, | |
| "grad_norm": 0.9845886373213341, | |
| "learning_rate": 4.546435635472133e-06, | |
| "loss": 0.0995, | |
| "sft_loss": 0.0058452394208870825, | |
| "step": 690, | |
| "total_loss": 0.006937743502530225, | |
| "value_loss": 0.010925040280527298, | |
| "value_loss_search": 0.038594244975945456, | |
| "value_loss_thought": 0.04880607676673208 | |
| }, | |
| { | |
| "epoch": 4.15, | |
| "grad_norm": 0.9063088407123128, | |
| "learning_rate": 4.536378630207598e-06, | |
| "loss": 0.0863, | |
| "sft_loss": 0.009303204133175313, | |
| "step": 695, | |
| "total_loss": 0.010333730792467577, | |
| "value_loss": 0.01030526671976304, | |
| "value_loss_search": 0.04375397186494183, | |
| "value_loss_thought": 0.038688162007019855 | |
| }, | |
| { | |
| "epoch": 4.18, | |
| "grad_norm": 0.6582700642646613, | |
| "learning_rate": 4.526222726670966e-06, | |
| "loss": 0.0954, | |
| "sft_loss": 0.011040500248782337, | |
| "step": 700, | |
| "total_loss": 0.01200565958547486, | |
| "value_loss": 0.009651594169963574, | |
| "value_loss_search": 0.03989513861957903, | |
| "value_loss_thought": 0.037317614558742204 | |
| }, | |
| { | |
| "epoch": 4.21, | |
| "grad_norm": 0.743159814030366, | |
| "learning_rate": 4.515968418091394e-06, | |
| "loss": 0.0899, | |
| "sft_loss": 0.010487070470117033, | |
| "step": 705, | |
| "total_loss": 0.011825196649658665, | |
| "value_loss": 0.013381260839241804, | |
| "value_loss_search": 0.036946902065199086, | |
| "value_loss_thought": 0.07010318333050236 | |
| }, | |
| { | |
| "epoch": 4.24, | |
| "grad_norm": 0.9990248193841446, | |
| "learning_rate": 4.505616202477152e-06, | |
| "loss": 0.1002, | |
| "sft_loss": 0.009330611897166819, | |
| "step": 710, | |
| "total_loss": 0.009855728735766433, | |
| "value_loss": 0.0052511682242766256, | |
| "value_loss_search": 0.02117164042309696, | |
| "value_loss_thought": 0.020837705221492798 | |
| }, | |
| { | |
| "epoch": 4.27, | |
| "grad_norm": 0.7065067498607398, | |
| "learning_rate": 4.49516658259144e-06, | |
| "loss": 0.1018, | |
| "sft_loss": 0.013342563062906265, | |
| "step": 715, | |
| "total_loss": 0.014073112735923132, | |
| "value_loss": 0.0073054967775533445, | |
| "value_loss_search": 0.023805043454274255, | |
| "value_loss_thought": 0.03463893002911504 | |
| }, | |
| { | |
| "epoch": 4.3, | |
| "grad_norm": 0.6847509331487238, | |
| "learning_rate": 4.48462006592797e-06, | |
| "loss": 0.0993, | |
| "sft_loss": 0.010668217262718827, | |
| "step": 720, | |
| "total_loss": 0.011838797037410132, | |
| "value_loss": 0.011705797309559784, | |
| "value_loss_search": 0.047429115066006486, | |
| "value_loss_thought": 0.046217263146081676 | |
| }, | |
| { | |
| "epoch": 4.33, | |
| "grad_norm": 0.6685576496831016, | |
| "learning_rate": 4.473977164686321e-06, | |
| "loss": 0.0944, | |
| "sft_loss": 0.008604107843711972, | |
| "step": 725, | |
| "total_loss": 0.009628339805681207, | |
| "value_loss": 0.010242320024417495, | |
| "value_loss_search": 0.035196531142617005, | |
| "value_loss_thought": 0.04674202849655558 | |
| }, | |
| { | |
| "epoch": 4.36, | |
| "grad_norm": 0.75658777055452, | |
| "learning_rate": 4.46323839574706e-06, | |
| "loss": 0.1034, | |
| "sft_loss": 0.010033530904911459, | |
| "step": 730, | |
| "total_loss": 0.011016872639970643, | |
| "value_loss": 0.009833417008576362, | |
| "value_loss_search": 0.02881108484938295, | |
| "value_loss_thought": 0.04985625083095328 | |
| }, | |
| { | |
| "epoch": 4.39, | |
| "grad_norm": 0.688131213767825, | |
| "learning_rate": 4.45240428064664e-06, | |
| "loss": 0.095, | |
| "sft_loss": 0.013836181082297117, | |
| "step": 735, | |
| "total_loss": 0.014805314792602076, | |
| "value_loss": 0.009691335982915916, | |
| "value_loss_search": 0.036203794245648166, | |
| "value_loss_thought": 0.04132689285597735 | |
| }, | |
| { | |
| "epoch": 4.42, | |
| "grad_norm": 0.7096504880990531, | |
| "learning_rate": 4.4414753455520795e-06, | |
| "loss": 0.0982, | |
| "sft_loss": 0.009608610440045596, | |
| "step": 740, | |
| "total_loss": 0.010440762112193625, | |
| "value_loss": 0.00832151709196296, | |
| "value_loss_search": 0.029671950675117388, | |
| "value_loss_thought": 0.036900186177172146 | |
| }, | |
| { | |
| "epoch": 4.45, | |
| "grad_norm": 0.6217612986336695, | |
| "learning_rate": 4.430452121235396e-06, | |
| "loss": 0.0971, | |
| "sft_loss": 0.012536874134093523, | |
| "step": 745, | |
| "total_loss": 0.014267633290864978, | |
| "value_loss": 0.017307592170084263, | |
| "value_loss_search": 0.034450266263445425, | |
| "value_loss_thought": 0.10401047060558995 | |
| }, | |
| { | |
| "epoch": 4.48, | |
| "grad_norm": 0.797630098293788, | |
| "learning_rate": 4.419335143047834e-06, | |
| "loss": 0.1029, | |
| "sft_loss": 0.014243904023896903, | |
| "step": 750, | |
| "total_loss": 0.015265003470494776, | |
| "value_loss": 0.010210994727276556, | |
| "value_loss_search": 0.032982522548752516, | |
| "value_loss_thought": 0.048705436136879145 | |
| }, | |
| { | |
| "epoch": 4.51, | |
| "grad_norm": 0.6961134075635874, | |
| "learning_rate": 4.408124950893868e-06, | |
| "loss": 0.095, | |
| "sft_loss": 0.012445987621322274, | |
| "step": 755, | |
| "total_loss": 0.01341653695722016, | |
| "value_loss": 0.009705493019964706, | |
| "value_loss_search": 0.03983713596752523, | |
| "value_loss_thought": 0.03780680882375691 | |
| }, | |
| { | |
| "epoch": 4.54, | |
| "grad_norm": 0.5762572982567685, | |
| "learning_rate": 4.396822089204981e-06, | |
| "loss": 0.0998, | |
| "sft_loss": 0.011862749280408025, | |
| "step": 760, | |
| "total_loss": 0.013249826465107617, | |
| "value_loss": 0.013870771408619476, | |
| "value_loss_search": 0.0406148220283626, | |
| "value_loss_thought": 0.07035134897487297 | |
| }, | |
| { | |
| "epoch": 4.57, | |
| "grad_norm": 0.7824308185518036, | |
| "learning_rate": 4.3854271069132195e-06, | |
| "loss": 0.1015, | |
| "sft_loss": 0.012830629444215447, | |
| "step": 765, | |
| "total_loss": 0.01348854236812258, | |
| "value_loss": 0.006579129521428228, | |
| "value_loss_search": 0.02838464450221636, | |
| "value_loss_thought": 0.024248391312721652 | |
| }, | |
| { | |
| "epoch": 4.6, | |
| "grad_norm": 1.115871263491779, | |
| "learning_rate": 4.373940557424537e-06, | |
| "loss": 0.0929, | |
| "sft_loss": 0.013013543572742491, | |
| "step": 770, | |
| "total_loss": 0.014466597687578542, | |
| "value_loss": 0.014530540375199052, | |
| "value_loss_search": 0.044251333865793184, | |
| "value_loss_thought": 0.07199298751397691 | |
| }, | |
| { | |
| "epoch": 4.63, | |
| "grad_norm": 0.5639360855348526, | |
| "learning_rate": 4.36236299859192e-06, | |
| "loss": 0.0942, | |
| "sft_loss": 0.010940996720455587, | |
| "step": 775, | |
| "total_loss": 0.01194770065769717, | |
| "value_loss": 0.010067038443867204, | |
| "value_loss_search": 0.032814532905274515, | |
| "value_loss_thought": 0.04772177415952683 | |
| }, | |
| { | |
| "epoch": 4.65, | |
| "grad_norm": 1.1373613205181072, | |
| "learning_rate": 4.350694992688289e-06, | |
| "loss": 0.1003, | |
| "sft_loss": 0.009771948284469544, | |
| "step": 780, | |
| "total_loss": 0.010786435697650632, | |
| "value_loss": 0.010144873889385054, | |
| "value_loss_search": 0.04210830284959002, | |
| "value_loss_thought": 0.03905068875028519 | |
| }, | |
| { | |
| "epoch": 4.68, | |
| "grad_norm": 0.7728548735492248, | |
| "learning_rate": 4.338937106379199e-06, | |
| "loss": 0.0986, | |
| "sft_loss": 0.01460006288252771, | |
| "step": 785, | |
| "total_loss": 0.015538786767115198, | |
| "value_loss": 0.009387238657767227, | |
| "value_loss_search": 0.03280571626974051, | |
| "value_loss_thought": 0.04229219339322299 | |
| }, | |
| { | |
| "epoch": 4.71, | |
| "grad_norm": 0.8732356620640505, | |
| "learning_rate": 4.32708991069531e-06, | |
| "loss": 0.0941, | |
| "sft_loss": 0.009909180679824204, | |
| "step": 790, | |
| "total_loss": 0.01077013838946641, | |
| "value_loss": 0.0086095773167699, | |
| "value_loss_search": 0.0318576935078454, | |
| "value_loss_thought": 0.03701892458407201 | |
| }, | |
| { | |
| "epoch": 4.74, | |
| "grad_norm": 1.143053369270145, | |
| "learning_rate": 4.315153981004666e-06, | |
| "loss": 0.0966, | |
| "sft_loss": 0.009806250128895045, | |
| "step": 795, | |
| "total_loss": 0.011583962598513154, | |
| "value_loss": 0.017777125288466776, | |
| "value_loss_search": 0.040363578098720154, | |
| "value_loss_thought": 0.1018534237449785 | |
| }, | |
| { | |
| "epoch": 4.77, | |
| "grad_norm": 0.7780179009340498, | |
| "learning_rate": 4.3031298969847406e-06, | |
| "loss": 0.1002, | |
| "sft_loss": 0.009359034500084818, | |
| "step": 800, | |
| "total_loss": 0.010968359952437367, | |
| "value_loss": 0.016093254049246754, | |
| "value_loss_search": 0.05174213330789144, | |
| "value_loss_thought": 0.07700389907186037 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "grad_norm": 0.7183429149076678, | |
| "learning_rate": 4.29101824259429e-06, | |
| "loss": 0.1018, | |
| "sft_loss": 0.009059342555701733, | |
| "step": 805, | |
| "total_loss": 0.010009716682918678, | |
| "value_loss": 0.009503741757498574, | |
| "value_loss_search": 0.03197408163345017, | |
| "value_loss_thought": 0.0440558526033783 | |
| }, | |
| { | |
| "epoch": 4.83, | |
| "grad_norm": 0.6507483126573996, | |
| "learning_rate": 4.2788196060449925e-06, | |
| "loss": 0.1002, | |
| "sft_loss": 0.010762089781928807, | |
| "step": 810, | |
| "total_loss": 0.012873523510279483, | |
| "value_loss": 0.021114336799837475, | |
| "value_loss_search": 0.04117565880492293, | |
| "value_loss_thought": 0.12773903450543003 | |
| }, | |
| { | |
| "epoch": 4.86, | |
| "grad_norm": 0.8117830572861622, | |
| "learning_rate": 4.266534579772881e-06, | |
| "loss": 0.0998, | |
| "sft_loss": 0.016996108298189937, | |
| "step": 815, | |
| "total_loss": 0.01824291221478802, | |
| "value_loss": 0.012468039073382897, | |
| "value_loss_search": 0.026130347241348773, | |
| "value_loss_thought": 0.07361396466958467 | |
| }, | |
| { | |
| "epoch": 4.89, | |
| "grad_norm": 0.9626098886691061, | |
| "learning_rate": 4.254163760409571e-06, | |
| "loss": 0.1041, | |
| "sft_loss": 0.01157297370955348, | |
| "step": 820, | |
| "total_loss": 0.012655413702026408, | |
| "value_loss": 0.01082440062873502, | |
| "value_loss_search": 0.029132241165916638, | |
| "value_loss_thought": 0.057462964772071246 | |
| }, | |
| { | |
| "epoch": 4.92, | |
| "grad_norm": 0.6101284730372293, | |
| "learning_rate": 4.2417077487532835e-06, | |
| "loss": 0.0917, | |
| "sft_loss": 0.00924121611751616, | |
| "step": 825, | |
| "total_loss": 0.010044124071836791, | |
| "value_loss": 0.008029079741447731, | |
| "value_loss_search": 0.031784425488490343, | |
| "value_loss_thought": 0.032448212833514845 | |
| }, | |
| { | |
| "epoch": 4.95, | |
| "grad_norm": 0.8024833457256252, | |
| "learning_rate": 4.229167149739667e-06, | |
| "loss": 0.094, | |
| "sft_loss": 0.00918948817998171, | |
| "step": 830, | |
| "total_loss": 0.010426052451214219, | |
| "value_loss": 0.01236564262903812, | |
| "value_loss_search": 0.03679989460033539, | |
| "value_loss_thought": 0.062125246099412834 | |
| }, | |
| { | |
| "epoch": 4.98, | |
| "grad_norm": 1.1622516088977335, | |
| "learning_rate": 4.216542572412423e-06, | |
| "loss": 0.0952, | |
| "sft_loss": 0.007891789707355202, | |
| "step": 835, | |
| "total_loss": 0.00946709308821454, | |
| "value_loss": 0.015753033644841707, | |
| "value_loss_search": 0.03111598624600447, | |
| "value_loss_thought": 0.0949082830469706 | |
| }, | |
| { | |
| "epoch": 5.01, | |
| "grad_norm": 0.6908442768231199, | |
| "learning_rate": 4.203834629893719e-06, | |
| "loss": 0.0811, | |
| "sft_loss": 0.007326198380906135, | |
| "step": 840, | |
| "total_loss": 0.008643819743883795, | |
| "value_loss": 0.013176213806036684, | |
| "value_loss_search": 0.05124346678378515, | |
| "value_loss_thought": 0.0541662441482913 | |
| }, | |
| { | |
| "epoch": 5.04, | |
| "grad_norm": 0.7901490430318995, | |
| "learning_rate": 4.19104393935442e-06, | |
| "loss": 0.0662, | |
| "sft_loss": 0.007722388836555183, | |
| "step": 845, | |
| "total_loss": 0.00843456873717514, | |
| "value_loss": 0.0071217986454485075, | |
| "value_loss_search": 0.028109038909803985, | |
| "value_loss_thought": 0.028865350570777083 | |
| }, | |
| { | |
| "epoch": 5.07, | |
| "grad_norm": 1.0101019251398367, | |
| "learning_rate": 4.178171121984109e-06, | |
| "loss": 0.0699, | |
| "sft_loss": 0.008467405906412751, | |
| "step": 850, | |
| "total_loss": 0.008816118605045631, | |
| "value_loss": 0.003487127016751401, | |
| "value_loss_search": 0.013744208114383127, | |
| "value_loss_thought": 0.014152807989376015 | |
| }, | |
| { | |
| "epoch": 5.1, | |
| "grad_norm": 0.653401555518481, | |
| "learning_rate": 4.16521680296092e-06, | |
| "loss": 0.067, | |
| "sft_loss": 0.010541580687277018, | |
| "step": 855, | |
| "total_loss": 0.011391171138998856, | |
| "value_loss": 0.008495904014353073, | |
| "value_loss_search": 0.02280127693209124, | |
| "value_loss_thought": 0.045165955742459116 | |
| }, | |
| { | |
| "epoch": 5.13, | |
| "grad_norm": 0.8573274418097468, | |
| "learning_rate": 4.152181611421179e-06, | |
| "loss": 0.0607, | |
| "sft_loss": 0.00878625299083069, | |
| "step": 860, | |
| "total_loss": 0.009619847631120138, | |
| "value_loss": 0.00833594629189065, | |
| "value_loss_search": 0.02539439773918275, | |
| "value_loss_thought": 0.04129317272327171 | |
| }, | |
| { | |
| "epoch": 5.16, | |
| "grad_norm": 0.6170985982380647, | |
| "learning_rate": 4.139066180428846e-06, | |
| "loss": 0.0655, | |
| "sft_loss": 0.009596668492304162, | |
| "step": 865, | |
| "total_loss": 0.010120830915363399, | |
| "value_loss": 0.005241623797928696, | |
| "value_loss_search": 0.021597909021647866, | |
| "value_loss_thought": 0.020335081457960768 | |
| }, | |
| { | |
| "epoch": 5.19, | |
| "grad_norm": 0.825156846628994, | |
| "learning_rate": 4.125871146944771e-06, | |
| "loss": 0.0695, | |
| "sft_loss": 0.010449141904246062, | |
| "step": 870, | |
| "total_loss": 0.011130747148763475, | |
| "value_loss": 0.006816053235161235, | |
| "value_loss_search": 0.02648081009813268, | |
| "value_loss_thought": 0.028047615623654563 | |
| }, | |
| { | |
| "epoch": 5.22, | |
| "grad_norm": 0.5819547422187984, | |
| "learning_rate": 4.112597151795758e-06, | |
| "loss": 0.063, | |
| "sft_loss": 0.00804176195524633, | |
| "step": 875, | |
| "total_loss": 0.009046485570181062, | |
| "value_loss": 0.010047235474598893, | |
| "value_loss_search": 0.022064273892272012, | |
| "value_loss_thought": 0.05831360963654788 | |
| }, | |
| { | |
| "epoch": 5.25, | |
| "grad_norm": 0.8024901403445527, | |
| "learning_rate": 4.099244839643448e-06, | |
| "loss": 0.0589, | |
| "sft_loss": 0.007425288169179112, | |
| "step": 880, | |
| "total_loss": 0.007866452395560409, | |
| "value_loss": 0.004411642082025935, | |
| "value_loss_search": 0.01583855556901881, | |
| "value_loss_thought": 0.019454581364698242 | |
| }, | |
| { | |
| "epoch": 5.28, | |
| "grad_norm": 0.6299816489940798, | |
| "learning_rate": 4.085814858953001e-06, | |
| "loss": 0.0707, | |
| "sft_loss": 0.010274743323680013, | |
| "step": 885, | |
| "total_loss": 0.011220432494735633, | |
| "value_loss": 0.009456892390926442, | |
| "value_loss_search": 0.017595245018355854, | |
| "value_loss_thought": 0.05805989508698985 | |
| }, | |
| { | |
| "epoch": 5.31, | |
| "grad_norm": 0.5775443078383485, | |
| "learning_rate": 4.072307861961614e-06, | |
| "loss": 0.0668, | |
| "sft_loss": 0.007527518505230546, | |
| "step": 890, | |
| "total_loss": 0.008477710493883706, | |
| "value_loss": 0.00950191973965957, | |
| "value_loss_search": 0.03369882960868278, | |
| "value_loss_thought": 0.042316528412220576 | |
| }, | |
| { | |
| "epoch": 5.34, | |
| "grad_norm": 0.7217258713196177, | |
| "learning_rate": 4.058724504646834e-06, | |
| "loss": 0.0695, | |
| "sft_loss": 0.008143483952153474, | |
| "step": 895, | |
| "total_loss": 0.009240032660204633, | |
| "value_loss": 0.01096548721779982, | |
| "value_loss_search": 0.03710676123126859, | |
| "value_loss_thought": 0.050617136721757564 | |
| }, | |
| { | |
| "epoch": 5.37, | |
| "grad_norm": 0.7509981121776955, | |
| "learning_rate": 4.045065446694709e-06, | |
| "loss": 0.0623, | |
| "sft_loss": 0.009156511997571216, | |
| "step": 900, | |
| "total_loss": 0.009822533285591817, | |
| "value_loss": 0.006660212647693698, | |
| "value_loss_search": 0.02795750253162623, | |
| "value_loss_thought": 0.02532419814169771 | |
| }, | |
| { | |
| "epoch": 5.4, | |
| "grad_norm": 0.5391534216473769, | |
| "learning_rate": 4.031331351467744e-06, | |
| "loss": 0.0693, | |
| "sft_loss": 0.006840780581114814, | |
| "step": 905, | |
| "total_loss": 0.007346747693952693, | |
| "value_loss": 0.005059671220953988, | |
| "value_loss_search": 0.023152905100801036, | |
| "value_loss_thought": 0.017324464485159296 | |
| }, | |
| { | |
| "epoch": 5.43, | |
| "grad_norm": 0.8478085328901763, | |
| "learning_rate": 4.017522885972687e-06, | |
| "loss": 0.066, | |
| "sft_loss": 0.00748998821945861, | |
| "step": 910, | |
| "total_loss": 0.008190430112915693, | |
| "value_loss": 0.007004418966835147, | |
| "value_loss_search": 0.0241462381236488, | |
| "value_loss_thought": 0.03188911370725691 | |
| }, | |
| { | |
| "epoch": 5.46, | |
| "grad_norm": 0.6273190350109188, | |
| "learning_rate": 4.0036407208281335e-06, | |
| "loss": 0.0642, | |
| "sft_loss": 0.007753341854549944, | |
| "step": 915, | |
| "total_loss": 0.008310681724867663, | |
| "value_loss": 0.0055733984515427435, | |
| "value_loss_search": 0.017653270972687096, | |
| "value_loss_thought": 0.026933916354755637 | |
| }, | |
| { | |
| "epoch": 5.49, | |
| "grad_norm": 0.6275397576143824, | |
| "learning_rate": 3.989685530231958e-06, | |
| "loss": 0.0723, | |
| "sft_loss": 0.008013604581356049, | |
| "step": 920, | |
| "total_loss": 0.008786806087823607, | |
| "value_loss": 0.007732015183682961, | |
| "value_loss_search": 0.018130889449082588, | |
| "value_loss_thought": 0.04372523128440662 | |
| }, | |
| { | |
| "epoch": 5.52, | |
| "grad_norm": 0.6557274591871389, | |
| "learning_rate": 3.975657991928573e-06, | |
| "loss": 0.0654, | |
| "sft_loss": 0.007132729375734925, | |
| "step": 925, | |
| "total_loss": 0.0075831895907356285, | |
| "value_loss": 0.004504602500765032, | |
| "value_loss_search": 0.017856063829344748, | |
| "value_loss_thought": 0.018180756335345903 | |
| }, | |
| { | |
| "epoch": 5.55, | |
| "grad_norm": 0.6502931359394508, | |
| "learning_rate": 3.961558787176012e-06, | |
| "loss": 0.0702, | |
| "sft_loss": 0.0077712137601338325, | |
| "step": 930, | |
| "total_loss": 0.008426044349789663, | |
| "value_loss": 0.006548305749731753, | |
| "value_loss_search": 0.020701198827305235, | |
| "value_loss_thought": 0.03168524749562494 | |
| }, | |
| { | |
| "epoch": 5.58, | |
| "grad_norm": 0.5593737214026034, | |
| "learning_rate": 3.9473886007128424e-06, | |
| "loss": 0.0674, | |
| "sft_loss": 0.0070376997464336455, | |
| "step": 935, | |
| "total_loss": 0.007807116948492876, | |
| "value_loss": 0.007694172377568975, | |
| "value_loss_search": 0.02574841559246579, | |
| "value_loss_thought": 0.03580496397080424 | |
| }, | |
| { | |
| "epoch": 5.61, | |
| "grad_norm": 0.5565142984447847, | |
| "learning_rate": 3.933148120724913e-06, | |
| "loss": 0.0695, | |
| "sft_loss": 0.005548381910193712, | |
| "step": 940, | |
| "total_loss": 0.006512936933782498, | |
| "value_loss": 0.009645550738008523, | |
| "value_loss_search": 0.02102770657133988, | |
| "value_loss_thought": 0.056136698765476466 | |
| }, | |
| { | |
| "epoch": 5.64, | |
| "grad_norm": 0.6653147056652764, | |
| "learning_rate": 3.9188380388119325e-06, | |
| "loss": 0.075, | |
| "sft_loss": 0.009176643792307005, | |
| "step": 945, | |
| "total_loss": 0.010155323132971716, | |
| "value_loss": 0.009786792815066291, | |
| "value_loss_search": 0.02649424351284324, | |
| "value_loss_thought": 0.05180009940095261 | |
| }, | |
| { | |
| "epoch": 5.67, | |
| "grad_norm": 0.4898734764580984, | |
| "learning_rate": 3.904459049953877e-06, | |
| "loss": 0.0661, | |
| "sft_loss": 0.008050526608712971, | |
| "step": 950, | |
| "total_loss": 0.008603187052369777, | |
| "value_loss": 0.005526604052067796, | |
| "value_loss_search": 0.02066867913152919, | |
| "value_loss_thought": 0.0235441530123353 | |
| }, | |
| { | |
| "epoch": 5.7, | |
| "grad_norm": 0.7610416619124152, | |
| "learning_rate": 3.890011852477243e-06, | |
| "loss": 0.0712, | |
| "sft_loss": 0.00837269393960014, | |
| "step": 955, | |
| "total_loss": 0.009244194875520861, | |
| "value_loss": 0.00871500901721447, | |
| "value_loss_search": 0.026370803010149758, | |
| "value_loss_thought": 0.04334926914202271 | |
| }, | |
| { | |
| "epoch": 5.73, | |
| "grad_norm": 0.7172771316565127, | |
| "learning_rate": 3.875497148021129e-06, | |
| "loss": 0.0726, | |
| "sft_loss": 0.008401849202346056, | |
| "step": 960, | |
| "total_loss": 0.008891117146237092, | |
| "value_loss": 0.004892679379145193, | |
| "value_loss_search": 0.01661159728510029, | |
| "value_loss_thought": 0.022529837636739103 | |
| }, | |
| { | |
| "epoch": 5.76, | |
| "grad_norm": 0.6480324222376628, | |
| "learning_rate": 3.860915641503161e-06, | |
| "loss": 0.0646, | |
| "sft_loss": 0.00682629911461845, | |
| "step": 965, | |
| "total_loss": 0.007262193315084176, | |
| "value_loss": 0.004358942487397144, | |
| "value_loss_search": 0.01821862360557134, | |
| "value_loss_thought": 0.016652916284147067 | |
| }, | |
| { | |
| "epoch": 5.79, | |
| "grad_norm": 0.6949624103946892, | |
| "learning_rate": 3.84626804108526e-06, | |
| "loss": 0.0719, | |
| "sft_loss": 0.008461356349289417, | |
| "step": 970, | |
| "total_loss": 0.009027346382336533, | |
| "value_loss": 0.005659900530326922, | |
| "value_loss_search": 0.019068497527393903, | |
| "value_loss_thought": 0.02621070666225478 | |
| }, | |
| { | |
| "epoch": 5.82, | |
| "grad_norm": 0.7622591624750499, | |
| "learning_rate": 3.831555058139244e-06, | |
| "loss": 0.0707, | |
| "sft_loss": 0.007402687979629263, | |
| "step": 975, | |
| "total_loss": 0.00789324305359287, | |
| "value_loss": 0.004905550461671737, | |
| "value_loss_search": 0.013050667314735165, | |
| "value_loss_thought": 0.02619373640912954 | |
| }, | |
| { | |
| "epoch": 5.85, | |
| "grad_norm": 0.6838250232406838, | |
| "learning_rate": 3.8167774072122854e-06, | |
| "loss": 0.0673, | |
| "sft_loss": 0.008353895461186766, | |
| "step": 980, | |
| "total_loss": 0.00888050429666123, | |
| "value_loss": 0.005266088167263661, | |
| "value_loss_search": 0.01670047964719288, | |
| "value_loss_thought": 0.025428225997166008 | |
| }, | |
| { | |
| "epoch": 5.88, | |
| "grad_norm": 0.9218213608030134, | |
| "learning_rate": 3.8019358059922052e-06, | |
| "loss": 0.0708, | |
| "sft_loss": 0.006913194921799004, | |
| "step": 985, | |
| "total_loss": 0.007807648131026213, | |
| "value_loss": 0.008944532042056608, | |
| "value_loss_search": 0.02053129872363115, | |
| "value_loss_thought": 0.05102495740206905 | |
| }, | |
| { | |
| "epoch": 5.91, | |
| "grad_norm": 0.7564910242846892, | |
| "learning_rate": 3.7870309752726185e-06, | |
| "loss": 0.064, | |
| "sft_loss": 0.0055543248075991866, | |
| "step": 990, | |
| "total_loss": 0.00604638959850945, | |
| "value_loss": 0.004920647482958884, | |
| "value_loss_search": 0.01775021549135545, | |
| "value_loss_thought": 0.021614964383616098 | |
| }, | |
| { | |
| "epoch": 5.94, | |
| "grad_norm": 0.7406424491645267, | |
| "learning_rate": 3.772063638917931e-06, | |
| "loss": 0.062, | |
| "sft_loss": 0.007381244131829589, | |
| "step": 995, | |
| "total_loss": 0.00787693980830113, | |
| "value_loss": 0.004956956215028186, | |
| "value_loss_search": 0.0186556116294355, | |
| "value_loss_thought": 0.02100003812029172 | |
| }, | |
| { | |
| "epoch": 5.97, | |
| "grad_norm": 0.8597100129196285, | |
| "learning_rate": 3.75703452382818e-06, | |
| "loss": 0.0686, | |
| "sft_loss": 0.0067865438759326935, | |
| "step": 1000, | |
| "total_loss": 0.0074210830740980786, | |
| "value_loss": 0.0063453914136630376, | |
| "value_loss_search": 0.020665711158505927, | |
| "value_loss_thought": 0.030097420751735625 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 0.6387120370362654, | |
| "learning_rate": 3.741944359903734e-06, | |
| "loss": 0.0685, | |
| "sft_loss": 0.007980260415934026, | |
| "step": 1005, | |
| "total_loss": 0.00841312516255357, | |
| "value_loss": 0.00432864762956342, | |
| "value_loss_search": 0.016856307840919273, | |
| "value_loss_thought": 0.017772873218109452 | |
| }, | |
| { | |
| "epoch": 6.03, | |
| "grad_norm": 0.47299058750162554, | |
| "learning_rate": 3.7267938800098454e-06, | |
| "loss": 0.0455, | |
| "sft_loss": 0.005139627197058872, | |
| "step": 1010, | |
| "total_loss": 0.005742018675823602, | |
| "value_loss": 0.006023914470279124, | |
| "value_loss_search": 0.014465152566535267, | |
| "value_loss_thought": 0.03372616275910332 | |
| }, | |
| { | |
| "epoch": 6.06, | |
| "grad_norm": 0.5769250479404755, | |
| "learning_rate": 3.7115838199410566e-06, | |
| "loss": 0.0442, | |
| "sft_loss": 0.004237775265937671, | |
| "step": 1015, | |
| "total_loss": 0.004536310447571168, | |
| "value_loss": 0.002985352237874395, | |
| "value_loss_search": 0.012497988520624403, | |
| "value_loss_thought": 0.01138482937376466 | |
| }, | |
| { | |
| "epoch": 6.09, | |
| "grad_norm": 0.5775093614029521, | |
| "learning_rate": 3.696314918385466e-06, | |
| "loss": 0.0455, | |
| "sft_loss": 0.004376189230242744, | |
| "step": 1020, | |
| "total_loss": 0.004725277596082833, | |
| "value_loss": 0.0034908839002582683, | |
| "value_loss_search": 0.012387420580819253, | |
| "value_loss_thought": 0.015539650371397328 | |
| }, | |
| { | |
| "epoch": 6.12, | |
| "grad_norm": 0.5743014343738564, | |
| "learning_rate": 3.680987916888855e-06, | |
| "loss": 0.0464, | |
| "sft_loss": 0.004472998692654074, | |
| "step": 1025, | |
| "total_loss": 0.004852331803667198, | |
| "value_loss": 0.0037933312117729655, | |
| "value_loss_search": 0.009115806162424179, | |
| "value_loss_thought": 0.02123084324521187 | |
| }, | |
| { | |
| "epoch": 6.15, | |
| "grad_norm": 1.0135045723056435, | |
| "learning_rate": 3.6656035598186717e-06, | |
| "loss": 0.0483, | |
| "sft_loss": 0.006532586639514193, | |
| "step": 1030, | |
| "total_loss": 0.007033583752149753, | |
| "value_loss": 0.005009971209847208, | |
| "value_loss_search": 0.01218861587973379, | |
| "value_loss_thought": 0.02789115408404541 | |
| }, | |
| { | |
| "epoch": 6.18, | |
| "grad_norm": 0.8265173915326937, | |
| "learning_rate": 3.650162594327881e-06, | |
| "loss": 0.0464, | |
| "sft_loss": 0.005671659158542753, | |
| "step": 1035, | |
| "total_loss": 0.006245543843522228, | |
| "value_loss": 0.005738847342217923, | |
| "value_loss_search": 0.015518051942945022, | |
| "value_loss_thought": 0.030392726854552167 | |
| }, | |
| { | |
| "epoch": 6.21, | |
| "grad_norm": 0.47962371612527804, | |
| "learning_rate": 3.634665770318678e-06, | |
| "loss": 0.0413, | |
| "sft_loss": 0.005972391797695309, | |
| "step": 1040, | |
| "total_loss": 0.006403001316243717, | |
| "value_loss": 0.004306095417541655, | |
| "value_loss_search": 0.016831279399025335, | |
| "value_loss_thought": 0.017617484057427645 | |
| }, | |
| { | |
| "epoch": 6.24, | |
| "grad_norm": 1.0086948671095952, | |
| "learning_rate": 3.619113840406071e-06, | |
| "loss": 0.0491, | |
| "sft_loss": 0.004732140112901106, | |
| "step": 1045, | |
| "total_loss": 0.005075448810612215, | |
| "value_loss": 0.003433087062830964, | |
| "value_loss_search": 0.009895986284152513, | |
| "value_loss_thought": 0.01756871009142742 | |
| }, | |
| { | |
| "epoch": 6.27, | |
| "grad_norm": 0.6224166386357451, | |
| "learning_rate": 3.6035075598813275e-06, | |
| "loss": 0.0508, | |
| "sft_loss": 0.005647319235140458, | |
| "step": 1050, | |
| "total_loss": 0.005981965384165733, | |
| "value_loss": 0.0033464612621173727, | |
| "value_loss_search": 0.014234105288312548, | |
| "value_loss_thought": 0.012537584816254822 | |
| }, | |
| { | |
| "epoch": 6.3, | |
| "grad_norm": 0.6803707206328181, | |
| "learning_rate": 3.587847686675293e-06, | |
| "loss": 0.0444, | |
| "sft_loss": 0.004154384031426162, | |
| "step": 1055, | |
| "total_loss": 0.0045537911174491795, | |
| "value_loss": 0.0039940711957683565, | |
| "value_loss_search": 0.012944002056576665, | |
| "value_loss_thought": 0.019008567334594773 | |
| }, | |
| { | |
| "epoch": 6.33, | |
| "grad_norm": 0.6488360833237377, | |
| "learning_rate": 3.572134981321582e-06, | |
| "loss": 0.0464, | |
| "sft_loss": 0.00495091185439378, | |
| "step": 1060, | |
| "total_loss": 0.005601490682847654, | |
| "value_loss": 0.006505788123034506, | |
| "value_loss_search": 0.00773236445022576, | |
| "value_loss_thought": 0.04431394169132545 | |
| }, | |
| { | |
| "epoch": 6.36, | |
| "grad_norm": 0.470284848811417, | |
| "learning_rate": 3.556370206919643e-06, | |
| "loss": 0.0412, | |
| "sft_loss": 0.006972516793757677, | |
| "step": 1065, | |
| "total_loss": 0.007320523636008147, | |
| "value_loss": 0.0034800680246689806, | |
| "value_loss_search": 0.015306914580980902, | |
| "value_loss_thought": 0.012533629250856392 | |
| }, | |
| { | |
| "epoch": 6.39, | |
| "grad_norm": 0.696033260327911, | |
| "learning_rate": 3.5405541290976968e-06, | |
| "loss": 0.0411, | |
| "sft_loss": 0.004306224733591079, | |
| "step": 1070, | |
| "total_loss": 0.004650098075256892, | |
| "value_loss": 0.003438732988161064, | |
| "value_loss_search": 0.013183290196491271, | |
| "value_loss_thought": 0.014326573800536835 | |
| }, | |
| { | |
| "epoch": 6.42, | |
| "grad_norm": 0.8518349953519152, | |
| "learning_rate": 3.5246875159755554e-06, | |
| "loss": 0.0393, | |
| "sft_loss": 0.004596246278379112, | |
| "step": 1075, | |
| "total_loss": 0.005092797088298085, | |
| "value_loss": 0.004965507755696308, | |
| "value_loss_search": 0.013638158090543584, | |
| "value_loss_thought": 0.026085903684634103 | |
| }, | |
| { | |
| "epoch": 6.45, | |
| "grad_norm": 0.7830604366139895, | |
| "learning_rate": 3.5087711381273144e-06, | |
| "loss": 0.0483, | |
| "sft_loss": 0.004399802925763652, | |
| "step": 1080, | |
| "total_loss": 0.00474689214189965, | |
| "value_loss": 0.0034708920188450064, | |
| "value_loss_search": 0.014559524009473534, | |
| "value_loss_thought": 0.013207611872235247 | |
| }, | |
| { | |
| "epoch": 6.48, | |
| "grad_norm": 1.0245098727939808, | |
| "learning_rate": 3.49280576854393e-06, | |
| "loss": 0.0453, | |
| "sft_loss": 0.006848539051134139, | |
| "step": 1085, | |
| "total_loss": 0.007486119516067901, | |
| "value_loss": 0.0063758047096257545, | |
| "value_loss_search": 0.01603142107730946, | |
| "value_loss_thought": 0.034975016615135246 | |
| }, | |
| { | |
| "epoch": 6.51, | |
| "grad_norm": 0.6521674554950044, | |
| "learning_rate": 3.4767921825956824e-06, | |
| "loss": 0.0487, | |
| "sft_loss": 0.004285465716384352, | |
| "step": 1090, | |
| "total_loss": 0.004569660496787265, | |
| "value_loss": 0.0028419478604973848, | |
| "value_loss_search": 0.008792001151141449, | |
| "value_loss_thought": 0.013943581694456952 | |
| }, | |
| { | |
| "epoch": 6.53, | |
| "grad_norm": 0.7150319458252575, | |
| "learning_rate": 3.4607311579945124e-06, | |
| "loss": 0.0506, | |
| "sft_loss": 0.006909280724357814, | |
| "step": 1095, | |
| "total_loss": 0.007147605080501762, | |
| "value_loss": 0.0023832433014376875, | |
| "value_loss_search": 0.010291470044728612, | |
| "value_loss_thought": 0.008774476365078953 | |
| }, | |
| { | |
| "epoch": 6.56, | |
| "grad_norm": 0.9363000353600848, | |
| "learning_rate": 3.444623474756258e-06, | |
| "loss": 0.0527, | |
| "sft_loss": 0.003712919045938179, | |
| "step": 1100, | |
| "total_loss": 0.004117744177119675, | |
| "value_loss": 0.004048251279891701, | |
| "value_loss_search": 0.012587877828525506, | |
| "value_loss_thought": 0.01979813240959629 | |
| }, | |
| { | |
| "epoch": 6.59, | |
| "grad_norm": 0.5759000271014428, | |
| "learning_rate": 3.4284699151627672e-06, | |
| "loss": 0.0463, | |
| "sft_loss": 0.005106915923533961, | |
| "step": 1105, | |
| "total_loss": 0.0054412948647041045, | |
| "value_loss": 0.0033437895152928832, | |
| "value_loss_search": 0.01137218730814311, | |
| "value_loss_thought": 0.015378128899897092 | |
| }, | |
| { | |
| "epoch": 6.62, | |
| "grad_norm": 0.7357711398285077, | |
| "learning_rate": 3.412271263723909e-06, | |
| "loss": 0.0434, | |
| "sft_loss": 0.005000182124786079, | |
| "step": 1110, | |
| "total_loss": 0.005477192047459311, | |
| "value_loss": 0.004770098890833197, | |
| "value_loss_search": 0.012354543731896683, | |
| "value_loss_thought": 0.025806247426635308 | |
| }, | |
| { | |
| "epoch": 6.65, | |
| "grad_norm": 0.8938685745921593, | |
| "learning_rate": 3.3960283071394717e-06, | |
| "loss": 0.0469, | |
| "sft_loss": 0.0063235011184588075, | |
| "step": 1115, | |
| "total_loss": 0.006910537980154174, | |
| "value_loss": 0.005870367820580214, | |
| "value_loss_search": 0.014325453480932993, | |
| "value_loss_thought": 0.032637489220542194 | |
| }, | |
| { | |
| "epoch": 6.68, | |
| "grad_norm": 0.6909317413670801, | |
| "learning_rate": 3.3797418342609577e-06, | |
| "loss": 0.047, | |
| "sft_loss": 0.004918072844156995, | |
| "step": 1120, | |
| "total_loss": 0.0052730022313710375, | |
| "value_loss": 0.0035492941345751207, | |
| "value_loss_search": 0.010046540721191377, | |
| "value_loss_thought": 0.018347811973399075 | |
| }, | |
| { | |
| "epoch": 6.71, | |
| "grad_norm": 0.5732090332805206, | |
| "learning_rate": 3.3634126360532694e-06, | |
| "loss": 0.0468, | |
| "sft_loss": 0.005004867579555139, | |
| "step": 1125, | |
| "total_loss": 0.005580213024023806, | |
| "value_loss": 0.005753454113050793, | |
| "value_loss_search": 0.0135478620575046, | |
| "value_loss_thought": 0.03247977066948806 | |
| }, | |
| { | |
| "epoch": 6.74, | |
| "grad_norm": 0.5934033812484811, | |
| "learning_rate": 3.347041505556298e-06, | |
| "loss": 0.0463, | |
| "sft_loss": 0.005900320567889139, | |
| "step": 1130, | |
| "total_loss": 0.006214558424846928, | |
| "value_loss": 0.0031423781176272312, | |
| "value_loss_search": 0.012277730915150187, | |
| "value_loss_thought": 0.01286129405855263 | |
| }, | |
| { | |
| "epoch": 6.77, | |
| "grad_norm": 0.6696886862461858, | |
| "learning_rate": 3.3306292378464083e-06, | |
| "loss": 0.0508, | |
| "sft_loss": 0.007460485817864538, | |
| "step": 1135, | |
| "total_loss": 0.007751618086540191, | |
| "value_loss": 0.0029113226871686493, | |
| "value_loss_search": 0.01191347677657859, | |
| "value_loss_thought": 0.011377104656867231 | |
| }, | |
| { | |
| "epoch": 6.8, | |
| "grad_norm": 0.7790037685619419, | |
| "learning_rate": 3.314176629997825e-06, | |
| "loss": 0.0452, | |
| "sft_loss": 0.004827470483724028, | |
| "step": 1140, | |
| "total_loss": 0.005163270902585282, | |
| "value_loss": 0.003358004500989864, | |
| "value_loss_search": 0.013757221815399134, | |
| "value_loss_thought": 0.013106814269212919 | |
| }, | |
| { | |
| "epoch": 6.83, | |
| "grad_norm": 0.5505345044218094, | |
| "learning_rate": 3.297684481043922e-06, | |
| "loss": 0.0469, | |
| "sft_loss": 0.0062236432102508845, | |
| "step": 1145, | |
| "total_loss": 0.006526504096120789, | |
| "value_loss": 0.003028608957868073, | |
| "value_loss_search": 0.012845393837199025, | |
| "value_loss_thought": 0.011383477907293127 | |
| }, | |
| { | |
| "epoch": 6.86, | |
| "grad_norm": 0.46343408162242566, | |
| "learning_rate": 3.281153591938418e-06, | |
| "loss": 0.0433, | |
| "sft_loss": 0.005084162973798811, | |
| "step": 1150, | |
| "total_loss": 0.005412001899304642, | |
| "value_loss": 0.003278389718082053, | |
| "value_loss_search": 0.012043231101108632, | |
| "value_loss_thought": 0.01418388647671236 | |
| }, | |
| { | |
| "epoch": 6.89, | |
| "grad_norm": 0.5728946583207882, | |
| "learning_rate": 3.264584765516474e-06, | |
| "loss": 0.0513, | |
| "sft_loss": 0.008054213871946558, | |
| "step": 1155, | |
| "total_loss": 0.008398819074250241, | |
| "value_loss": 0.0034460521280379906, | |
| "value_loss_search": 0.013998744965681454, | |
| "value_loss_thought": 0.01356967230240116 | |
| }, | |
| { | |
| "epoch": 6.92, | |
| "grad_norm": 0.5867841886877614, | |
| "learning_rate": 3.2479788064557084e-06, | |
| "loss": 0.0424, | |
| "sft_loss": 0.004617373802466318, | |
| "step": 1160, | |
| "total_loss": 0.004921634896254546, | |
| "value_loss": 0.003042611040564225, | |
| "value_loss_search": 0.013083630732762686, | |
| "value_loss_thought": 0.01125725753390725 | |
| }, | |
| { | |
| "epoch": 6.95, | |
| "grad_norm": 0.5749765325052467, | |
| "learning_rate": 3.231336521237113e-06, | |
| "loss": 0.0425, | |
| "sft_loss": 0.005324905528686941, | |
| "step": 1165, | |
| "total_loss": 0.0056936069204539305, | |
| "value_loss": 0.003687014164006541, | |
| "value_loss_search": 0.01334903096231983, | |
| "value_loss_thought": 0.016147082374664022 | |
| }, | |
| { | |
| "epoch": 6.98, | |
| "grad_norm": 0.5228964319711119, | |
| "learning_rate": 3.2146587181058858e-06, | |
| "loss": 0.049, | |
| "sft_loss": 0.003777366707799956, | |
| "step": 1170, | |
| "total_loss": 0.004077984397645196, | |
| "value_loss": 0.0030061770619795427, | |
| "value_loss_search": 0.011763368438232646, | |
| "value_loss_thought": 0.012286048040141394 | |
| }, | |
| { | |
| "epoch": 7.01, | |
| "grad_norm": 0.40592168178993326, | |
| "learning_rate": 3.1979462070321817e-06, | |
| "loss": 0.0374, | |
| "sft_loss": 0.005217826526495628, | |
| "step": 1175, | |
| "total_loss": 0.005588610990344023, | |
| "value_loss": 0.003707844631036039, | |
| "value_loss_search": 0.01431789886352135, | |
| "value_loss_thought": 0.015344858078242396 | |
| }, | |
| { | |
| "epoch": 7.04, | |
| "grad_norm": 0.5057051615052391, | |
| "learning_rate": 3.1811997996717716e-06, | |
| "loss": 0.0303, | |
| "sft_loss": 0.0033945336355827747, | |
| "step": 1180, | |
| "total_loss": 0.0038138232659576943, | |
| "value_loss": 0.004192896059157647, | |
| "value_loss_search": 0.006568620411331949, | |
| "value_loss_thought": 0.02697454801736967 | |
| }, | |
| { | |
| "epoch": 7.07, | |
| "grad_norm": 0.751983057399965, | |
| "learning_rate": 3.1644203093266257e-06, | |
| "loss": 0.0311, | |
| "sft_loss": 0.0022496749937999994, | |
| "step": 1185, | |
| "total_loss": 0.0026717950843249128, | |
| "value_loss": 0.004221200832080285, | |
| "value_loss_search": 0.008355579411721692, | |
| "value_loss_thought": 0.025414026997623296 | |
| }, | |
| { | |
| "epoch": 7.1, | |
| "grad_norm": 0.48042996735947746, | |
| "learning_rate": 3.147608550905415e-06, | |
| "loss": 0.0303, | |
| "sft_loss": 0.0035940095724072306, | |
| "step": 1190, | |
| "total_loss": 0.0038380636684223644, | |
| "value_loss": 0.002440540775091904, | |
| "value_loss_search": 0.008547667160826222, | |
| "value_loss_thought": 0.01097665907091141 | |
| }, | |
| { | |
| "epoch": 7.13, | |
| "grad_norm": 0.38813805754778213, | |
| "learning_rate": 3.1307653408839316e-06, | |
| "loss": 0.0314, | |
| "sft_loss": 0.0033699018502375113, | |
| "step": 1195, | |
| "total_loss": 0.003613495991919535, | |
| "value_loss": 0.002435941319390622, | |
| "value_loss_search": 0.009487671555439193, | |
| "value_loss_thought": 0.009999858911305637 | |
| }, | |
| { | |
| "epoch": 7.16, | |
| "grad_norm": 0.500612598170884, | |
| "learning_rate": 3.1138914972654423e-06, | |
| "loss": 0.0328, | |
| "sft_loss": 0.0036761065653990953, | |
| "step": 1200, | |
| "total_loss": 0.00389666182005044, | |
| "value_loss": 0.0022055522495065816, | |
| "value_loss_search": 0.010129135770739595, | |
| "value_loss_thought": 0.0075152823273128885 | |
| }, | |
| { | |
| "epoch": 7.19, | |
| "grad_norm": 0.6490281681098725, | |
| "learning_rate": 3.0969878395409536e-06, | |
| "loss": 0.0253, | |
| "sft_loss": 0.003135368030052632, | |
| "step": 1205, | |
| "total_loss": 0.0033622780613200122, | |
| "value_loss": 0.002269100064461327, | |
| "value_loss_search": 0.009437274075094138, | |
| "value_loss_thought": 0.00871552659527879 | |
| }, | |
| { | |
| "epoch": 7.22, | |
| "grad_norm": 0.4935810834710449, | |
| "learning_rate": 3.08005518864942e-06, | |
| "loss": 0.0306, | |
| "sft_loss": 0.0035176657198462634, | |
| "step": 1210, | |
| "total_loss": 0.0037368611569259967, | |
| "value_loss": 0.0021919542997125064, | |
| "value_loss_search": 0.008411810171332945, | |
| "value_loss_thought": 0.009123824179141593 | |
| }, | |
| { | |
| "epoch": 7.25, | |
| "grad_norm": 0.5321452984531243, | |
| "learning_rate": 3.06309436693787e-06, | |
| "loss": 0.0298, | |
| "sft_loss": 0.0036181122821290048, | |
| "step": 1215, | |
| "total_loss": 0.003920620708220213, | |
| "value_loss": 0.0030250843786689074, | |
| "value_loss_search": 0.012618535867022728, | |
| "value_loss_thought": 0.011582138900485007 | |
| }, | |
| { | |
| "epoch": 7.28, | |
| "grad_norm": 0.5041384981332757, | |
| "learning_rate": 3.0461061981214685e-06, | |
| "loss": 0.029, | |
| "sft_loss": 0.0037700470944400876, | |
| "step": 1220, | |
| "total_loss": 0.003952616032614742, | |
| "value_loss": 0.001825689259169394, | |
| "value_loss_search": 0.007131004981476963, | |
| "value_loss_thought": 0.0074745089516000006 | |
| }, | |
| { | |
| "epoch": 7.31, | |
| "grad_norm": 0.825130289977903, | |
| "learning_rate": 3.029091507243514e-06, | |
| "loss": 0.0337, | |
| "sft_loss": 0.00417679272359237, | |
| "step": 1225, | |
| "total_loss": 0.004465274805897934, | |
| "value_loss": 0.0028848204653513674, | |
| "value_loss_search": 0.012175960628178472, | |
| "value_loss_thought": 0.010902603188787907 | |
| }, | |
| { | |
| "epoch": 7.34, | |
| "grad_norm": 0.48612582380031194, | |
| "learning_rate": 3.0120511206353692e-06, | |
| "loss": 0.0306, | |
| "sft_loss": 0.0037190442701103164, | |
| "step": 1230, | |
| "total_loss": 0.003983076896520288, | |
| "value_loss": 0.0026403263425891057, | |
| "value_loss_search": 0.009736456504344914, | |
| "value_loss_thought": 0.011386154282945427 | |
| }, | |
| { | |
| "epoch": 7.37, | |
| "grad_norm": 0.49557711764797474, | |
| "learning_rate": 2.9949858658763297e-06, | |
| "loss": 0.0293, | |
| "sft_loss": 0.0033108420844655483, | |
| "step": 1235, | |
| "total_loss": 0.003569280348868631, | |
| "value_loss": 0.002584382791928874, | |
| "value_loss_search": 0.009431568763068299, | |
| "value_loss_thought": 0.011243493664233028 | |
| }, | |
| { | |
| "epoch": 7.4, | |
| "grad_norm": 0.5415430915788131, | |
| "learning_rate": 2.9778965717534314e-06, | |
| "loss": 0.0323, | |
| "sft_loss": 0.0030432559084147214, | |
| "step": 1240, | |
| "total_loss": 0.003313265562081824, | |
| "value_loss": 0.002700096501041571, | |
| "value_loss_search": 0.006881270110670812, | |
| "value_loss_thought": 0.014719502057914724 | |
| }, | |
| { | |
| "epoch": 7.43, | |
| "grad_norm": 0.708236640225555, | |
| "learning_rate": 2.9607840682211987e-06, | |
| "loss": 0.0315, | |
| "sft_loss": 0.003214700281387195, | |
| "step": 1245, | |
| "total_loss": 0.003709355751084331, | |
| "value_loss": 0.004946554816625337, | |
| "value_loss_search": 0.006554803127994546, | |
| "value_loss_thought": 0.03301763468744454 | |
| }, | |
| { | |
| "epoch": 7.46, | |
| "grad_norm": 0.5437059054594167, | |
| "learning_rate": 2.9436491863613404e-06, | |
| "loss": 0.0316, | |
| "sft_loss": 0.0036181325966026636, | |
| "step": 1250, | |
| "total_loss": 0.003912387714774468, | |
| "value_loss": 0.0029425512826605884, | |
| "value_loss_search": 0.01317626194404511, | |
| "value_loss_thought": 0.01036414824043277 | |
| }, | |
| { | |
| "epoch": 7.49, | |
| "grad_norm": 0.6432414654067561, | |
| "learning_rate": 2.9264927583423847e-06, | |
| "loss": 0.0306, | |
| "sft_loss": 0.003098224982386455, | |
| "step": 1255, | |
| "total_loss": 0.003732235044594745, | |
| "value_loss": 0.006340100448505836, | |
| "value_loss_search": 0.010029617098223299, | |
| "value_loss_thought": 0.040691186868843945 | |
| }, | |
| { | |
| "epoch": 7.52, | |
| "grad_norm": 0.7784154218893079, | |
| "learning_rate": 2.9093156173792675e-06, | |
| "loss": 0.0329, | |
| "sft_loss": 0.003491103381384164, | |
| "step": 1260, | |
| "total_loss": 0.003718133881397989, | |
| "value_loss": 0.002270305072852352, | |
| "value_loss_search": 0.010139237881389818, | |
| "value_loss_thought": 0.008023202697256693 | |
| }, | |
| { | |
| "epoch": 7.55, | |
| "grad_norm": 0.5051189878209226, | |
| "learning_rate": 2.8921185976928613e-06, | |
| "loss": 0.0299, | |
| "sft_loss": 0.0037886684003751725, | |
| "step": 1265, | |
| "total_loss": 0.00400653738573169, | |
| "value_loss": 0.0021786899614198775, | |
| "value_loss_search": 0.00914538588888263, | |
| "value_loss_thought": 0.008284133870392906 | |
| }, | |
| { | |
| "epoch": 7.58, | |
| "grad_norm": 0.6290655324172468, | |
| "learning_rate": 2.8749025344694653e-06, | |
| "loss": 0.0336, | |
| "sft_loss": 0.004462181986309588, | |
| "step": 1270, | |
| "total_loss": 0.004616037011123808, | |
| "value_loss": 0.00153855009958761, | |
| "value_loss_search": 0.005969772761523018, | |
| "value_loss_thought": 0.006338628097728361 | |
| }, | |
| { | |
| "epoch": 7.61, | |
| "grad_norm": 0.5333504288117544, | |
| "learning_rate": 2.857668263820244e-06, | |
| "loss": 0.0303, | |
| "sft_loss": 0.003401619120268151, | |
| "step": 1275, | |
| "total_loss": 0.003593827542800909, | |
| "value_loss": 0.0019220843075743233, | |
| "value_loss_search": 0.008753876088519519, | |
| "value_loss_thought": 0.006622798505304672 | |
| }, | |
| { | |
| "epoch": 7.64, | |
| "grad_norm": 0.4126781957882197, | |
| "learning_rate": 2.840416622740617e-06, | |
| "loss": 0.0295, | |
| "sft_loss": 0.004203358304221183, | |
| "step": 1280, | |
| "total_loss": 0.0044454284535731855, | |
| "value_loss": 0.0024207017429930034, | |
| "value_loss_search": 0.00895141239620898, | |
| "value_loss_thought": 0.010414201496178065 | |
| }, | |
| { | |
| "epoch": 7.67, | |
| "grad_norm": 0.636868430333384, | |
| "learning_rate": 2.823148449069613e-06, | |
| "loss": 0.0317, | |
| "sft_loss": 0.0037902468640822915, | |
| "step": 1285, | |
| "total_loss": 0.004080002412759143, | |
| "value_loss": 0.002897555428683063, | |
| "value_loss_search": 0.010850950920882951, | |
| "value_loss_thought": 0.012329492640174067 | |
| }, | |
| { | |
| "epoch": 7.7, | |
| "grad_norm": 0.49524669578109337, | |
| "learning_rate": 2.8058645814491784e-06, | |
| "loss": 0.0312, | |
| "sft_loss": 0.004466524376766756, | |
| "step": 1290, | |
| "total_loss": 0.00469799009814551, | |
| "value_loss": 0.002314657226224881, | |
| "value_loss_search": 0.006950648547240234, | |
| "value_loss_thought": 0.011566609143937968 | |
| }, | |
| { | |
| "epoch": 7.73, | |
| "grad_norm": 0.4873869632029965, | |
| "learning_rate": 2.7885658592834488e-06, | |
| "loss": 0.032, | |
| "sft_loss": 0.004330064181704074, | |
| "step": 1295, | |
| "total_loss": 0.0045417543143230436, | |
| "value_loss": 0.0021169017202510077, | |
| "value_loss_search": 0.00826698833628825, | |
| "value_loss_thought": 0.00866822535913343 | |
| }, | |
| { | |
| "epoch": 7.76, | |
| "grad_norm": 0.5038272789213597, | |
| "learning_rate": 2.771253122697981e-06, | |
| "loss": 0.0331, | |
| "sft_loss": 0.0041820299404207615, | |
| "step": 1300, | |
| "total_loss": 0.004449167268626297, | |
| "value_loss": 0.002671373126996457, | |
| "value_loss_search": 0.008917606517729836, | |
| "value_loss_thought": 0.012453378505870204 | |
| }, | |
| { | |
| "epoch": 7.79, | |
| "grad_norm": 0.5716128545581782, | |
| "learning_rate": 2.7539272124989545e-06, | |
| "loss": 0.0327, | |
| "sft_loss": 0.0034668007108848544, | |
| "step": 1305, | |
| "total_loss": 0.0037846647005551405, | |
| "value_loss": 0.0031786399593784153, | |
| "value_loss_search": 0.012202327424117244, | |
| "value_loss_thought": 0.013226792128807573 | |
| }, | |
| { | |
| "epoch": 7.82, | |
| "grad_norm": 0.7312620161870373, | |
| "learning_rate": 2.736588970132333e-06, | |
| "loss": 0.032, | |
| "sft_loss": 0.0036935678450390696, | |
| "step": 1310, | |
| "total_loss": 0.003966284790448071, | |
| "value_loss": 0.0027271693567854525, | |
| "value_loss_search": 0.011858981241562105, | |
| "value_loss_thought": 0.00995837363161627 | |
| }, | |
| { | |
| "epoch": 7.85, | |
| "grad_norm": 0.48258120287180617, | |
| "learning_rate": 2.7192392376430014e-06, | |
| "loss": 0.0313, | |
| "sft_loss": 0.003437778353691101, | |
| "step": 1315, | |
| "total_loss": 0.0036498856757901875, | |
| "value_loss": 0.0021210731328892506, | |
| "value_loss_search": 0.008885550579176994, | |
| "value_loss_thought": 0.008083034464357297 | |
| }, | |
| { | |
| "epoch": 7.88, | |
| "grad_norm": 0.6553771008550281, | |
| "learning_rate": 2.701878857633874e-06, | |
| "loss": 0.0328, | |
| "sft_loss": 0.002638998458860442, | |
| "step": 1320, | |
| "total_loss": 0.002854476947356943, | |
| "value_loss": 0.0021547851526747762, | |
| "value_loss_search": 0.006954838147055398, | |
| "value_loss_thought": 0.010283443070898101 | |
| }, | |
| { | |
| "epoch": 7.91, | |
| "grad_norm": 0.5831125961756038, | |
| "learning_rate": 2.684508673224967e-06, | |
| "loss": 0.0348, | |
| "sft_loss": 0.004423308192053809, | |
| "step": 1325, | |
| "total_loss": 0.004638882860058402, | |
| "value_loss": 0.002155746738399955, | |
| "value_loss_search": 0.009081033444783771, | |
| "value_loss_thought": 0.008164940534516062 | |
| }, | |
| { | |
| "epoch": 7.94, | |
| "grad_norm": 0.6554790409450288, | |
| "learning_rate": 2.6671295280124567e-06, | |
| "loss": 0.0322, | |
| "sft_loss": 0.003197679913137108, | |
| "step": 1330, | |
| "total_loss": 0.0033951037816166265, | |
| "value_loss": 0.001974238623790825, | |
| "value_loss_search": 0.007716037955879074, | |
| "value_loss_thought": 0.008077871069599497 | |
| }, | |
| { | |
| "epoch": 7.97, | |
| "grad_norm": 0.6464974512156216, | |
| "learning_rate": 2.649742266027705e-06, | |
| "loss": 0.0309, | |
| "sft_loss": 0.0025466441409662368, | |
| "step": 1335, | |
| "total_loss": 0.002726721732233273, | |
| "value_loss": 0.001800775762740159, | |
| "value_loss_search": 0.006972815421841005, | |
| "value_loss_thought": 0.007433390758114911 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 0.6803735749099431, | |
| "learning_rate": 2.632347731696274e-06, | |
| "loss": 0.033, | |
| "sft_loss": 0.003279316209955141, | |
| "step": 1340, | |
| "total_loss": 0.0035762524097776804, | |
| "value_loss": 0.002969362228782302, | |
| "value_loss_search": 0.008045078085967817, | |
| "value_loss_thought": 0.0157098194164746 | |
| }, | |
| { | |
| "epoch": 8.03, | |
| "grad_norm": 0.39085071585625647, | |
| "learning_rate": 2.6149467697969118e-06, | |
| "loss": 0.0225, | |
| "sft_loss": 0.002843447361374274, | |
| "step": 1345, | |
| "total_loss": 0.002981356151798309, | |
| "value_loss": 0.001379087858413186, | |
| "value_loss_search": 0.004844959436161389, | |
| "value_loss_thought": 0.006187743457485339 | |
| }, | |
| { | |
| "epoch": 8.06, | |
| "grad_norm": 0.48063989373141985, | |
| "learning_rate": 2.597540225420525e-06, | |
| "loss": 0.0226, | |
| "sft_loss": 0.002574404375627637, | |
| "step": 1350, | |
| "total_loss": 0.0028025899320653024, | |
| "value_loss": 0.0022818553584329493, | |
| "value_loss_search": 0.005695947931963019, | |
| "value_loss_thought": 0.012558894986273116 | |
| }, | |
| { | |
| "epoch": 8.09, | |
| "grad_norm": 0.4206131958945122, | |
| "learning_rate": 2.580128943929139e-06, | |
| "loss": 0.0212, | |
| "sft_loss": 0.002781625863281079, | |
| "step": 1355, | |
| "total_loss": 0.002963343672460894, | |
| "value_loss": 0.0018171783105231042, | |
| "value_loss_search": 0.008560673631276928, | |
| "value_loss_thought": 0.005976753031427506 | |
| }, | |
| { | |
| "epoch": 8.12, | |
| "grad_norm": 0.3653474778372023, | |
| "learning_rate": 2.5627137709148386e-06, | |
| "loss": 0.0225, | |
| "sft_loss": 0.0017891598748974503, | |
| "step": 1360, | |
| "total_loss": 0.001957193014266068, | |
| "value_loss": 0.0016803315540641962, | |
| "value_loss_search": 0.006167404261202591, | |
| "value_loss_thought": 0.007275248032328818 | |
| }, | |
| { | |
| "epoch": 8.15, | |
| "grad_norm": 0.43951845600689593, | |
| "learning_rate": 2.5452955521587064e-06, | |
| "loss": 0.0225, | |
| "sft_loss": 0.0016710011375835165, | |
| "step": 1365, | |
| "total_loss": 0.0018185687295726894, | |
| "value_loss": 0.0014756758409930626, | |
| "value_loss_search": 0.006119606431695956, | |
| "value_loss_thought": 0.005685800284209108 | |
| }, | |
| { | |
| "epoch": 8.18, | |
| "grad_norm": 0.45317276348714697, | |
| "learning_rate": 2.5278751335897423e-06, | |
| "loss": 0.0238, | |
| "sft_loss": 0.0029196401010267437, | |
| "step": 1370, | |
| "total_loss": 0.0030648296158460654, | |
| "value_loss": 0.0014518950408728415, | |
| "value_loss_search": 0.0060932497550993505, | |
| "value_loss_thought": 0.005521910469178692 | |
| }, | |
| { | |
| "epoch": 8.21, | |
| "grad_norm": 0.3857977796232688, | |
| "learning_rate": 2.5104533612437816e-06, | |
| "loss": 0.0203, | |
| "sft_loss": 0.002657680620905012, | |
| "step": 1375, | |
| "total_loss": 0.0027829522318313592, | |
| "value_loss": 0.00125271600504675, | |
| "value_loss_search": 0.004111475246963892, | |
| "value_loss_thought": 0.005910252820331152 | |
| }, | |
| { | |
| "epoch": 8.24, | |
| "grad_norm": 0.43983223503478724, | |
| "learning_rate": 2.493031081222406e-06, | |
| "loss": 0.0214, | |
| "sft_loss": 0.002025950566167012, | |
| "step": 1380, | |
| "total_loss": 0.0021369275005781673, | |
| "value_loss": 0.0011097693309352508, | |
| "value_loss_search": 0.003321273262153568, | |
| "value_loss_thought": 0.005556881445545514 | |
| }, | |
| { | |
| "epoch": 8.27, | |
| "grad_norm": 0.3626207865464384, | |
| "learning_rate": 2.475609139651855e-06, | |
| "loss": 0.0208, | |
| "sft_loss": 0.002262994254124351, | |
| "step": 1385, | |
| "total_loss": 0.0023927846590680703, | |
| "value_loss": 0.0012979039850506524, | |
| "value_loss_search": 0.005410684119317466, | |
| "value_loss_thought": 0.004972547819033934 | |
| }, | |
| { | |
| "epoch": 8.3, | |
| "grad_norm": 0.5204196278274228, | |
| "learning_rate": 2.4581883826419294e-06, | |
| "loss": 0.0238, | |
| "sft_loss": 0.0023898789659142494, | |
| "step": 1390, | |
| "total_loss": 0.0025361195974028306, | |
| "value_loss": 0.0014624062704569952, | |
| "value_loss_search": 0.0058827654516449, | |
| "value_loss_thought": 0.005816484717388448 | |
| }, | |
| { | |
| "epoch": 8.33, | |
| "grad_norm": 0.45142759026857865, | |
| "learning_rate": 2.4407696562449006e-06, | |
| "loss": 0.0209, | |
| "sft_loss": 0.0020229626476066186, | |
| "step": 1395, | |
| "total_loss": 0.0021993215879206216, | |
| "value_loss": 0.0017635896525462157, | |
| "value_loss_search": 0.006506465665435712, | |
| "value_loss_thought": 0.007602251682465066 | |
| }, | |
| { | |
| "epoch": 8.36, | |
| "grad_norm": 0.6201398131026475, | |
| "learning_rate": 2.4233538064144226e-06, | |
| "loss": 0.0214, | |
| "sft_loss": 0.002320754388347268, | |
| "step": 1400, | |
| "total_loss": 0.0024575029708927333, | |
| "value_loss": 0.0013674858636591124, | |
| "value_loss_search": 0.004308351113706976, | |
| "value_loss_thought": 0.006631535803444421 | |
| }, | |
| { | |
| "epoch": 8.38, | |
| "grad_norm": 0.45220431536451633, | |
| "learning_rate": 2.4059416789644473e-06, | |
| "loss": 0.0224, | |
| "sft_loss": 0.0025798780581681056, | |
| "step": 1405, | |
| "total_loss": 0.00268079744263332, | |
| "value_loss": 0.0010091937743709422, | |
| "value_loss_search": 0.004291023600296739, | |
| "value_loss_thought": 0.0037825266010713676 | |
| }, | |
| { | |
| "epoch": 8.41, | |
| "grad_norm": 0.45477594831619567, | |
| "learning_rate": 2.388534119528145e-06, | |
| "loss": 0.0195, | |
| "sft_loss": 0.001965572632616386, | |
| "step": 1410, | |
| "total_loss": 0.0021669458707378906, | |
| "value_loss": 0.002013732181683281, | |
| "value_loss_search": 0.006261610782757998, | |
| "value_loss_thought": 0.009848246640262914 | |
| }, | |
| { | |
| "epoch": 8.44, | |
| "grad_norm": 0.4218999322836366, | |
| "learning_rate": 2.3711319735168378e-06, | |
| "loss": 0.0219, | |
| "sft_loss": 0.002767064847284928, | |
| "step": 1415, | |
| "total_loss": 0.0029677543869524926, | |
| "value_loss": 0.0020068954641317303, | |
| "value_loss_search": 0.006902593411530234, | |
| "value_loss_thought": 0.00915257024798848 | |
| }, | |
| { | |
| "epoch": 8.47, | |
| "grad_norm": 0.39469190101455454, | |
| "learning_rate": 2.353736086078941e-06, | |
| "loss": 0.0219, | |
| "sft_loss": 0.00205565721844323, | |
| "step": 1420, | |
| "total_loss": 0.0023155218750730453, | |
| "value_loss": 0.0025986466305312206, | |
| "value_loss_search": 0.009201484014715789, | |
| "value_loss_thought": 0.011587689043972204 | |
| }, | |
| { | |
| "epoch": 8.5, | |
| "grad_norm": 0.3222692938718468, | |
| "learning_rate": 2.336347302058916e-06, | |
| "loss": 0.0231, | |
| "sft_loss": 0.0034793566446751356, | |
| "step": 1425, | |
| "total_loss": 0.00360437715615376, | |
| "value_loss": 0.0012502055355525954, | |
| "value_loss_search": 0.004974942305875629, | |
| "value_loss_thought": 0.005026701947775792 | |
| }, | |
| { | |
| "epoch": 8.53, | |
| "grad_norm": 0.4599366779305295, | |
| "learning_rate": 2.3189664659562442e-06, | |
| "loss": 0.024, | |
| "sft_loss": 0.002877801636350341, | |
| "step": 1430, | |
| "total_loss": 0.003031013450221565, | |
| "value_loss": 0.0015321182537206823, | |
| "value_loss_search": 0.00576030847768152, | |
| "value_loss_thought": 0.006496637430245755 | |
| }, | |
| { | |
| "epoch": 8.56, | |
| "grad_norm": 0.4315667432642988, | |
| "learning_rate": 2.3015944218844063e-06, | |
| "loss": 0.022, | |
| "sft_loss": 0.002819139277562499, | |
| "step": 1435, | |
| "total_loss": 0.002962993244301515, | |
| "value_loss": 0.0014385397402392642, | |
| "value_loss_search": 0.005644433948407368, | |
| "value_loss_thought": 0.005863883913934842 | |
| }, | |
| { | |
| "epoch": 8.59, | |
| "grad_norm": 0.4745674858393937, | |
| "learning_rate": 2.2842320135298946e-06, | |
| "loss": 0.0229, | |
| "sft_loss": 0.002344584878301248, | |
| "step": 1440, | |
| "total_loss": 0.0024817303616316622, | |
| "value_loss": 0.0013714549205573689, | |
| "value_loss_search": 0.006260404985391687, | |
| "value_loss_thought": 0.004711234440060252 | |
| }, | |
| { | |
| "epoch": 8.62, | |
| "grad_norm": 0.40694918242518996, | |
| "learning_rate": 2.2668800841112345e-06, | |
| "loss": 0.0229, | |
| "sft_loss": 0.00296173918468412, | |
| "step": 1445, | |
| "total_loss": 0.0030806214503627414, | |
| "value_loss": 0.0011888226746123109, | |
| "value_loss_search": 0.004837690460476551, | |
| "value_loss_thought": 0.004672890894835291 | |
| }, | |
| { | |
| "epoch": 8.65, | |
| "grad_norm": 0.4107733649493513, | |
| "learning_rate": 2.2495394763380338e-06, | |
| "loss": 0.0225, | |
| "sft_loss": 0.003584610787220299, | |
| "step": 1450, | |
| "total_loss": 0.0038719090720064743, | |
| "value_loss": 0.0028729828365271714, | |
| "value_loss_search": 0.005230140845671372, | |
| "value_loss_thought": 0.017753721810265688 | |
| }, | |
| { | |
| "epoch": 8.68, | |
| "grad_norm": 0.3728305802169862, | |
| "learning_rate": 2.232211032370057e-06, | |
| "loss": 0.0227, | |
| "sft_loss": 0.0025901119457557797, | |
| "step": 1455, | |
| "total_loss": 0.0027724159214699284, | |
| "value_loss": 0.0018230398026389595, | |
| "value_loss_search": 0.006323750824310537, | |
| "value_loss_thought": 0.008260567580418866 | |
| }, | |
| { | |
| "epoch": 8.71, | |
| "grad_norm": 0.43244392951704247, | |
| "learning_rate": 2.2148955937763215e-06, | |
| "loss": 0.0202, | |
| "sft_loss": 0.0023802727228030562, | |
| "step": 1460, | |
| "total_loss": 0.0025262993830239113, | |
| "value_loss": 0.0014602663856294385, | |
| "value_loss_search": 0.004974742010833211, | |
| "value_loss_thought": 0.006707389143184628 | |
| }, | |
| { | |
| "epoch": 8.74, | |
| "grad_norm": 0.446983159592719, | |
| "learning_rate": 2.197594001494232e-06, | |
| "loss": 0.0231, | |
| "sft_loss": 0.00247747907997109, | |
| "step": 1465, | |
| "total_loss": 0.0028393455249869247, | |
| "value_loss": 0.0036186647702152186, | |
| "value_loss_search": 0.005167914255184769, | |
| "value_loss_thought": 0.02378140405708109 | |
| }, | |
| { | |
| "epoch": 8.77, | |
| "grad_norm": 0.582681357981295, | |
| "learning_rate": 2.1803070957887348e-06, | |
| "loss": 0.0232, | |
| "sft_loss": 0.0029129542876034976, | |
| "step": 1470, | |
| "total_loss": 0.0030655652646260022, | |
| "value_loss": 0.0015261098120788574, | |
| "value_loss_search": 0.006358572702845322, | |
| "value_loss_thought": 0.005850305858075444 | |
| }, | |
| { | |
| "epoch": 8.8, | |
| "grad_norm": 0.4700551072376061, | |
| "learning_rate": 2.1630357162115133e-06, | |
| "loss": 0.0219, | |
| "sft_loss": 0.002174633409595117, | |
| "step": 1475, | |
| "total_loss": 0.002347504053091143, | |
| "value_loss": 0.0017287064572656164, | |
| "value_loss_search": 0.008121828264177112, | |
| "value_loss_thought": 0.005707823473858298 | |
| }, | |
| { | |
| "epoch": 8.83, | |
| "grad_norm": 0.7663496628024437, | |
| "learning_rate": 2.1457807015602086e-06, | |
| "loss": 0.0234, | |
| "sft_loss": 0.0025546713673975318, | |
| "step": 1480, | |
| "total_loss": 0.00283914315147058, | |
| "value_loss": 0.002844717770221905, | |
| "value_loss_search": 0.011075520714871345, | |
| "value_loss_thought": 0.011682221261048653 | |
| }, | |
| { | |
| "epoch": 8.86, | |
| "grad_norm": 0.3411989638023231, | |
| "learning_rate": 2.1285428898376907e-06, | |
| "loss": 0.0218, | |
| "sft_loss": 0.0021655169257428497, | |
| "step": 1485, | |
| "total_loss": 0.0023130710998373162, | |
| "value_loss": 0.0014755416389562015, | |
| "value_loss_search": 0.005575540512140265, | |
| "value_loss_thought": 0.006228792705860542 | |
| }, | |
| { | |
| "epoch": 8.89, | |
| "grad_norm": 0.4283075171896123, | |
| "learning_rate": 2.1113231182113557e-06, | |
| "loss": 0.0226, | |
| "sft_loss": 0.002557673762203194, | |
| "step": 1490, | |
| "total_loss": 0.002811015537105277, | |
| "value_loss": 0.002533417688255213, | |
| "value_loss_search": 0.006894818281846682, | |
| "value_loss_thought": 0.013372523540647307 | |
| }, | |
| { | |
| "epoch": 8.92, | |
| "grad_norm": 0.38025999762981544, | |
| "learning_rate": 2.0941222229724683e-06, | |
| "loss": 0.0195, | |
| "sft_loss": 0.002423516203998588, | |
| "step": 1495, | |
| "total_loss": 0.0026445118043511686, | |
| "value_loss": 0.002209956094793597, | |
| "value_loss_search": 0.007773246503208498, | |
| "value_loss_thought": 0.00990640217037111 | |
| }, | |
| { | |
| "epoch": 8.95, | |
| "grad_norm": 0.4107993684483595, | |
| "learning_rate": 2.076941039495545e-06, | |
| "loss": 0.023, | |
| "sft_loss": 0.002852113952394575, | |
| "step": 1500, | |
| "total_loss": 0.003029771816866855, | |
| "value_loss": 0.0017765785493793374, | |
| "value_loss_search": 0.006161500808036635, | |
| "value_loss_thought": 0.008051127563931004 | |
| }, | |
| { | |
| "epoch": 8.98, | |
| "grad_norm": 0.43206266155631295, | |
| "learning_rate": 2.05978040219779e-06, | |
| "loss": 0.0219, | |
| "sft_loss": 0.0023770652449456977, | |
| "step": 1505, | |
| "total_loss": 0.002536764156579352, | |
| "value_loss": 0.0015969893091551056, | |
| "value_loss_search": 0.0054944734949231135, | |
| "value_loss_thought": 0.007281441086252016 | |
| }, | |
| { | |
| "epoch": 9.01, | |
| "grad_norm": 0.374824172192575, | |
| "learning_rate": 2.0426411444985622e-06, | |
| "loss": 0.0212, | |
| "sft_loss": 0.0025530525454087183, | |
| "step": 1510, | |
| "total_loss": 0.0026753786481890527, | |
| "value_loss": 0.0012232610420596756, | |
| "value_loss_search": 0.004044807156776642, | |
| "value_loss_thought": 0.005741281189693836 | |
| }, | |
| { | |
| "epoch": 9.04, | |
| "grad_norm": 0.5103310551708304, | |
| "learning_rate": 2.0255240987789077e-06, | |
| "loss": 0.017, | |
| "sft_loss": 0.002143319571041502, | |
| "step": 1515, | |
| "total_loss": 0.0022470162215711296, | |
| "value_loss": 0.001036966439151854, | |
| "value_loss_search": 0.004559061944007681, | |
| "value_loss_thought": 0.0037366695483342484 | |
| }, | |
| { | |
| "epoch": 9.07, | |
| "grad_norm": 0.41407771047339026, | |
| "learning_rate": 2.008430096341129e-06, | |
| "loss": 0.0165, | |
| "sft_loss": 0.00211839419498574, | |
| "step": 1520, | |
| "total_loss": 0.002260489938532828, | |
| "value_loss": 0.0014209576240091337, | |
| "value_loss_search": 0.006807764833172314, | |
| "value_loss_thought": 0.004559896182763623 | |
| }, | |
| { | |
| "epoch": 9.1, | |
| "grad_norm": 0.3998377123999996, | |
| "learning_rate": 1.991359967368416e-06, | |
| "loss": 0.0167, | |
| "sft_loss": 0.0017885153938550502, | |
| "step": 1525, | |
| "total_loss": 0.0019043610838139103, | |
| "value_loss": 0.001158456964958532, | |
| "value_loss_search": 0.004520953930921223, | |
| "value_loss_thought": 0.004746701816475252 | |
| }, | |
| { | |
| "epoch": 9.13, | |
| "grad_norm": 0.25993528776663594, | |
| "learning_rate": 1.974314540884522e-06, | |
| "loss": 0.0168, | |
| "sft_loss": 0.0022823128500021996, | |
| "step": 1530, | |
| "total_loss": 0.0023883844661270357, | |
| "value_loss": 0.0010607163117128948, | |
| "value_loss_search": 0.003205415800539413, | |
| "value_loss_thought": 0.0052803147056920356 | |
| }, | |
| { | |
| "epoch": 9.16, | |
| "grad_norm": 0.28607830106364285, | |
| "learning_rate": 1.9572946447135087e-06, | |
| "loss": 0.017, | |
| "sft_loss": 0.0020253795781172814, | |
| "step": 1535, | |
| "total_loss": 0.002180098254166296, | |
| "value_loss": 0.0015471866376628896, | |
| "value_loss_search": 0.005289423008980521, | |
| "value_loss_thought": 0.007088070128247637 | |
| }, | |
| { | |
| "epoch": 9.19, | |
| "grad_norm": 0.36665251321589426, | |
| "learning_rate": 1.9403011054395372e-06, | |
| "loss": 0.0176, | |
| "sft_loss": 0.0020406075345817953, | |
| "step": 1540, | |
| "total_loss": 0.00214596866593979, | |
| "value_loss": 0.0010536111770306888, | |
| "value_loss_search": 0.003708674301128667, | |
| "value_loss_thought": 0.004720214986241445 | |
| }, | |
| { | |
| "epoch": 9.22, | |
| "grad_norm": 0.33701179906071765, | |
| "learning_rate": 1.923334748366727e-06, | |
| "loss": 0.0164, | |
| "sft_loss": 0.0018277755152666941, | |
| "step": 1545, | |
| "total_loss": 0.001965286196420379, | |
| "value_loss": 0.0013751067914199665, | |
| "value_loss_search": 0.0051922910111670715, | |
| "value_loss_thought": 0.005808563233449604 | |
| }, | |
| { | |
| "epoch": 9.25, | |
| "grad_norm": 0.30304308100827854, | |
| "learning_rate": 1.9063963974790715e-06, | |
| "loss": 0.0171, | |
| "sft_loss": 0.0021657033037627118, | |
| "step": 1550, | |
| "total_loss": 0.0022887821434451894, | |
| "value_loss": 0.0012307884190931873, | |
| "value_loss_search": 0.004165462106902851, | |
| "value_loss_thought": 0.005680845198685347 | |
| }, | |
| { | |
| "epoch": 9.28, | |
| "grad_norm": 0.348110330157348, | |
| "learning_rate": 1.8894868754004247e-06, | |
| "loss": 0.0168, | |
| "sft_loss": 0.002240948341204785, | |
| "step": 1555, | |
| "total_loss": 0.002330947991924148, | |
| "value_loss": 0.000899996584598739, | |
| "value_loss_search": 0.0027729876618877826, | |
| "value_loss_thought": 0.00442698502301937 | |
| }, | |
| { | |
| "epoch": 9.31, | |
| "grad_norm": 0.3510042973090616, | |
| "learning_rate": 1.8726070033545468e-06, | |
| "loss": 0.0176, | |
| "sft_loss": 0.0018620806687977165, | |
| "step": 1560, | |
| "total_loss": 0.0019822285716736944, | |
| "value_loss": 0.0012014789214504162, | |
| "value_loss_search": 0.0045417374156954795, | |
| "value_loss_thought": 0.005070093995891511 | |
| }, | |
| { | |
| "epoch": 9.34, | |
| "grad_norm": 0.2988369790384324, | |
| "learning_rate": 1.855757601125221e-06, | |
| "loss": 0.0168, | |
| "sft_loss": 0.0019012396631296724, | |
| "step": 1565, | |
| "total_loss": 0.0020716833577807845, | |
| "value_loss": 0.001704436970885581, | |
| "value_loss_search": 0.006682152269536346, | |
| "value_loss_thought": 0.006953343548593694 | |
| }, | |
| { | |
| "epoch": 9.37, | |
| "grad_norm": 0.4000228004615745, | |
| "learning_rate": 1.8389394870164418e-06, | |
| "loss": 0.0174, | |
| "sft_loss": 0.0017132473614765332, | |
| "step": 1570, | |
| "total_loss": 0.0018083037684050395, | |
| "value_loss": 0.0009505639430244627, | |
| "value_loss_search": 0.004880918659534927, | |
| "value_loss_thought": 0.0027235929035441587 | |
| }, | |
| { | |
| "epoch": 9.4, | |
| "grad_norm": 0.3335584079653667, | |
| "learning_rate": 1.8221534778126712e-06, | |
| "loss": 0.016, | |
| "sft_loss": 0.0019671204237965865, | |
| "step": 1575, | |
| "total_loss": 0.0020535163486698595, | |
| "value_loss": 0.0008639591912242394, | |
| "value_loss_search": 0.0034627173671083256, | |
| "value_loss_thought": 0.003448956180272944 | |
| }, | |
| { | |
| "epoch": 9.43, | |
| "grad_norm": 0.3026400110385097, | |
| "learning_rate": 1.8054003887391727e-06, | |
| "loss": 0.0166, | |
| "sft_loss": 0.002014622194110416, | |
| "step": 1580, | |
| "total_loss": 0.0021511696702731344, | |
| "value_loss": 0.001365474661497501, | |
| "value_loss_search": 0.004294166664863042, | |
| "value_loss_thought": 0.006629630598598624 | |
| }, | |
| { | |
| "epoch": 9.46, | |
| "grad_norm": 0.43139165457443246, | |
| "learning_rate": 1.7886810334224192e-06, | |
| "loss": 0.0163, | |
| "sft_loss": 0.0021359879698138683, | |
| "step": 1585, | |
| "total_loss": 0.0022444051660613696, | |
| "value_loss": 0.0010841718215033325, | |
| "value_loss_search": 0.004590211787228782, | |
| "value_loss_thought": 0.004083162726351475 | |
| }, | |
| { | |
| "epoch": 9.49, | |
| "grad_norm": 0.34009988215077, | |
| "learning_rate": 1.7719962238505779e-06, | |
| "loss": 0.0166, | |
| "sft_loss": 0.002009772404562682, | |
| "step": 1590, | |
| "total_loss": 0.0022419645182367278, | |
| "value_loss": 0.0023219212426283777, | |
| "value_loss_search": 0.0064497248539396425, | |
| "value_loss_thought": 0.01212564545467103 | |
| }, | |
| { | |
| "epoch": 9.52, | |
| "grad_norm": 0.29633516039709396, | |
| "learning_rate": 1.7553467703340755e-06, | |
| "loss": 0.017, | |
| "sft_loss": 0.0015611476090271025, | |
| "step": 1595, | |
| "total_loss": 0.0016733453244299312, | |
| "value_loss": 0.0011219771564128678, | |
| "value_loss_search": 0.0053791521318601095, | |
| "value_loss_thought": 0.0035966651016906327 | |
| }, | |
| { | |
| "epoch": 9.55, | |
| "grad_norm": 0.38962016205008826, | |
| "learning_rate": 1.7387334814662452e-06, | |
| "loss": 0.0168, | |
| "sft_loss": 0.002254475053632632, | |
| "step": 1600, | |
| "total_loss": 0.0023654911761866516, | |
| "value_loss": 0.0011101611622962083, | |
| "value_loss_search": 0.004612163749004594, | |
| "value_loss_thought": 0.004269125514247208 | |
| }, | |
| { | |
| "epoch": 9.58, | |
| "grad_norm": 0.38388365774795874, | |
| "learning_rate": 1.7221571640840562e-06, | |
| "loss": 0.0176, | |
| "sft_loss": 0.0018735320656560362, | |
| "step": 1605, | |
| "total_loss": 0.0019748406046375066, | |
| "value_loss": 0.0010130854097496922, | |
| "value_loss_search": 0.003405629392977971, | |
| "value_loss_thought": 0.004699053899685168 | |
| }, | |
| { | |
| "epoch": 9.61, | |
| "grad_norm": 0.4095939751375467, | |
| "learning_rate": 1.7056186232289298e-06, | |
| "loss": 0.0166, | |
| "sft_loss": 0.0022918267059139907, | |
| "step": 1610, | |
| "total_loss": 0.0024106179324689947, | |
| "value_loss": 0.001187912198918184, | |
| "value_loss_search": 0.004366835134328539, | |
| "value_loss_thought": 0.005136462485006632 | |
| }, | |
| { | |
| "epoch": 9.64, | |
| "grad_norm": 0.3302901304739707, | |
| "learning_rate": 1.6891186621076433e-06, | |
| "loss": 0.0186, | |
| "sft_loss": 0.002024157461710274, | |
| "step": 1615, | |
| "total_loss": 0.0021383855524845785, | |
| "value_loss": 0.001142280875455981, | |
| "value_loss_search": 0.004602078883806371, | |
| "value_loss_thought": 0.004536168186041323 | |
| }, | |
| { | |
| "epoch": 9.67, | |
| "grad_norm": 0.3679502879592282, | |
| "learning_rate": 1.6726580820533155e-06, | |
| "loss": 0.0159, | |
| "sft_loss": 0.0017695592978270724, | |
| "step": 1620, | |
| "total_loss": 0.0018712303529298425, | |
| "value_loss": 0.0010167105092577344, | |
| "value_loss_search": 0.003538572788136207, | |
| "value_loss_thought": 0.004595111271009955 | |
| }, | |
| { | |
| "epoch": 9.7, | |
| "grad_norm": 0.30533989281219753, | |
| "learning_rate": 1.6562376824864985e-06, | |
| "loss": 0.0166, | |
| "sft_loss": 0.0025343591201817616, | |
| "step": 1625, | |
| "total_loss": 0.002648697585539139, | |
| "value_loss": 0.0011433845557576206, | |
| "value_loss_search": 0.004449716299927786, | |
| "value_loss_thought": 0.004697360047975963 | |
| }, | |
| { | |
| "epoch": 9.73, | |
| "grad_norm": 0.2877518184615656, | |
| "learning_rate": 1.6398582608763457e-06, | |
| "loss": 0.0179, | |
| "sft_loss": 0.0024212473537772892, | |
| "step": 1630, | |
| "total_loss": 0.0025349426502771165, | |
| "value_loss": 0.0011369530704428144, | |
| "value_loss_search": 0.0051415300209441735, | |
| "value_loss_thought": 0.003954094471146164 | |
| }, | |
| { | |
| "epoch": 9.76, | |
| "grad_norm": 0.36027684134993015, | |
| "learning_rate": 1.6235206127018865e-06, | |
| "loss": 0.016, | |
| "sft_loss": 0.002206899574957788, | |
| "step": 1635, | |
| "total_loss": 0.0022886144271467403, | |
| "value_loss": 0.000817148587839256, | |
| "value_loss_search": 0.003036996565958816, | |
| "value_loss_thought": 0.003500192180490558 | |
| }, | |
| { | |
| "epoch": 9.79, | |
| "grad_norm": 0.35076644651015965, | |
| "learning_rate": 1.6072255314133921e-06, | |
| "loss": 0.0173, | |
| "sft_loss": 0.0019144602090818807, | |
| "step": 1640, | |
| "total_loss": 0.002069194418021425, | |
| "value_loss": 0.0015473420381795222, | |
| "value_loss_search": 0.005170887146491054, | |
| "value_loss_thought": 0.007207849150842094 | |
| }, | |
| { | |
| "epoch": 9.82, | |
| "grad_norm": 0.30979208746416814, | |
| "learning_rate": 1.5909738083938387e-06, | |
| "loss": 0.0181, | |
| "sft_loss": 0.0023195294284960254, | |
| "step": 1645, | |
| "total_loss": 0.002506226720114313, | |
| "value_loss": 0.0018669727418881622, | |
| "value_loss_search": 0.004914225066238486, | |
| "value_loss_thought": 0.010021556961669375 | |
| }, | |
| { | |
| "epoch": 9.85, | |
| "grad_norm": 0.3414912343606952, | |
| "learning_rate": 1.5747662329204758e-06, | |
| "loss": 0.0164, | |
| "sft_loss": 0.0017500042042229325, | |
| "step": 1650, | |
| "total_loss": 0.0018422375416491832, | |
| "value_loss": 0.0009223333461136462, | |
| "value_loss_search": 0.003468296695177742, | |
| "value_loss_thought": 0.003910370041808164 | |
| }, | |
| { | |
| "epoch": 9.88, | |
| "grad_norm": 0.34143690574927327, | |
| "learning_rate": 1.5586035921264952e-06, | |
| "loss": 0.0167, | |
| "sft_loss": 0.001885048404801637, | |
| "step": 1655, | |
| "total_loss": 0.0019954120705705236, | |
| "value_loss": 0.001103636745813219, | |
| "value_loss_search": 0.004034641608018319, | |
| "value_loss_thought": 0.004794452414535045 | |
| }, | |
| { | |
| "epoch": 9.91, | |
| "grad_norm": 0.3940780464860908, | |
| "learning_rate": 1.5424866709628018e-06, | |
| "loss": 0.0167, | |
| "sft_loss": 0.0023443336365744473, | |
| "step": 1660, | |
| "total_loss": 0.0025089123803297753, | |
| "value_loss": 0.001645787319603187, | |
| "value_loss_search": 0.005139281411049979, | |
| "value_loss_thought": 0.008027016961932532 | |
| }, | |
| { | |
| "epoch": 9.94, | |
| "grad_norm": 0.28513052875466016, | |
| "learning_rate": 1.5264162521598893e-06, | |
| "loss": 0.017, | |
| "sft_loss": 0.002210353355621919, | |
| "step": 1665, | |
| "total_loss": 0.0023033933971760233, | |
| "value_loss": 0.0009304003870511223, | |
| "value_loss_search": 0.004236812041176563, | |
| "value_loss_thought": 0.00320639110132106 | |
| }, | |
| { | |
| "epoch": 9.97, | |
| "grad_norm": 0.6554785763050915, | |
| "learning_rate": 1.5103931161898321e-06, | |
| "loss": 0.017, | |
| "sft_loss": 0.001787349657388404, | |
| "step": 1670, | |
| "total_loss": 0.0019071295019472245, | |
| "value_loss": 0.0011977983157066773, | |
| "value_loss_search": 0.003837722380649211, | |
| "value_loss_thought": 0.0057446641365231695 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 0.47343410940009084, | |
| "learning_rate": 1.4944180412283765e-06, | |
| "loss": 0.0173, | |
| "sft_loss": 0.0018310324900085106, | |
| "step": 1675, | |
| "total_loss": 0.001928851098625728, | |
| "value_loss": 0.0009781861556859895, | |
| "value_loss_search": 0.00370205105889454, | |
| "value_loss_thought": 0.004123438105875721 | |
| }, | |
| { | |
| "epoch": 10.03, | |
| "grad_norm": 0.3191231605103231, | |
| "learning_rate": 1.4784918031171507e-06, | |
| "loss": 0.0138, | |
| "sft_loss": 0.0015405306039610878, | |
| "step": 1680, | |
| "total_loss": 0.0016421698385045147, | |
| "value_loss": 0.0010163923268578401, | |
| "value_loss_search": 0.0045566821872171205, | |
| "value_loss_thought": 0.003574456366766299 | |
| }, | |
| { | |
| "epoch": 10.06, | |
| "grad_norm": 0.19891894504058574, | |
| "learning_rate": 1.4626151753259826e-06, | |
| "loss": 0.0138, | |
| "sft_loss": 0.001970075577264652, | |
| "step": 1685, | |
| "total_loss": 0.0020514145978665966, | |
| "value_loss": 0.0008133902773010959, | |
| "value_loss_search": 0.0033379198979332616, | |
| "value_loss_thought": 0.003169202328041365 | |
| }, | |
| { | |
| "epoch": 10.09, | |
| "grad_norm": 0.2844490093173974, | |
| "learning_rate": 1.4467889289153372e-06, | |
| "loss": 0.0132, | |
| "sft_loss": 0.0011790773802204057, | |
| "step": 1690, | |
| "total_loss": 0.0012578035701309887, | |
| "value_loss": 0.0007872618921737739, | |
| "value_loss_search": 0.0035334496231712365, | |
| "value_loss_thought": 0.0027646455130707183 | |
| }, | |
| { | |
| "epoch": 10.12, | |
| "grad_norm": 0.22486418307955425, | |
| "learning_rate": 1.4310138324988727e-06, | |
| "loss": 0.0132, | |
| "sft_loss": 0.0016656344232615083, | |
| "step": 1695, | |
| "total_loss": 0.0017502345744446757, | |
| "value_loss": 0.0008460015417313116, | |
| "value_loss_search": 0.0036587552590958694, | |
| "value_loss_thought": 0.0031092570511646045 | |
| }, | |
| { | |
| "epoch": 10.15, | |
| "grad_norm": 0.2753979527731448, | |
| "learning_rate": 1.415290652206105e-06, | |
| "loss": 0.0143, | |
| "sft_loss": 0.0017361613281536847, | |
| "step": 1700, | |
| "total_loss": 0.0018177182257247183, | |
| "value_loss": 0.0008155690053840203, | |
| "value_loss_search": 0.003209375508242829, | |
| "value_loss_thought": 0.003315176499722838 | |
| }, | |
| { | |
| "epoch": 10.18, | |
| "grad_norm": 0.2572570727278164, | |
| "learning_rate": 1.3996201516452062e-06, | |
| "loss": 0.0137, | |
| "sft_loss": 0.001388478121953085, | |
| "step": 1705, | |
| "total_loss": 0.0014834851837221663, | |
| "value_loss": 0.0009500706352980615, | |
| "value_loss_search": 0.0030097221551613983, | |
| "value_loss_thought": 0.004590843019104796 | |
| }, | |
| { | |
| "epoch": 10.21, | |
| "grad_norm": 0.27025436045593615, | |
| "learning_rate": 1.3840030918659174e-06, | |
| "loss": 0.0147, | |
| "sft_loss": 0.0013848344882717357, | |
| "step": 1710, | |
| "total_loss": 0.0014918723345459738, | |
| "value_loss": 0.0010703784395445838, | |
| "value_loss_search": 0.0053198799042775136, | |
| "value_loss_thought": 0.0032431475258817956 | |
| }, | |
| { | |
| "epoch": 10.23, | |
| "grad_norm": 0.2813750257238534, | |
| "learning_rate": 1.3684402313225858e-06, | |
| "loss": 0.014, | |
| "sft_loss": 0.0020835736999288202, | |
| "step": 1715, | |
| "total_loss": 0.002167087908361509, | |
| "value_loss": 0.0008351421248391944, | |
| "value_loss_search": 0.003797487074473338, | |
| "value_loss_thought": 0.0028836499267299587 | |
| }, | |
| { | |
| "epoch": 10.26, | |
| "grad_norm": 0.23513053578061044, | |
| "learning_rate": 1.3529323258373347e-06, | |
| "loss": 0.0156, | |
| "sft_loss": 0.001715753084863536, | |
| "step": 1720, | |
| "total_loss": 0.0017993840303944352, | |
| "value_loss": 0.0008363092606941791, | |
| "value_loss_search": 0.0030788765871307077, | |
| "value_loss_thought": 0.003611597566623459 | |
| }, | |
| { | |
| "epoch": 10.29, | |
| "grad_norm": 0.23227494152180417, | |
| "learning_rate": 1.3374801285633498e-06, | |
| "loss": 0.014, | |
| "sft_loss": 0.001370953200967051, | |
| "step": 1725, | |
| "total_loss": 0.0014732162911762713, | |
| "value_loss": 0.0010226307487414487, | |
| "value_loss_search": 0.0036572990339521993, | |
| "value_loss_thought": 0.004523746974246024 | |
| }, | |
| { | |
| "epoch": 10.32, | |
| "grad_norm": 0.27072849780059804, | |
| "learning_rate": 1.3220843899483093e-06, | |
| "loss": 0.0132, | |
| "sft_loss": 0.002192886942066252, | |
| "step": 1730, | |
| "total_loss": 0.0022738821006910827, | |
| "value_loss": 0.0008099516685206254, | |
| "value_loss_search": 0.0027381081928751884, | |
| "value_loss_thought": 0.0037415051008338196 | |
| }, | |
| { | |
| "epoch": 10.35, | |
| "grad_norm": 0.23754108921707667, | |
| "learning_rate": 1.3067458576979305e-06, | |
| "loss": 0.0137, | |
| "sft_loss": 0.0013820198219036683, | |
| "step": 1735, | |
| "total_loss": 0.0014648040350152768, | |
| "value_loss": 0.0008278420881822513, | |
| "value_loss_search": 0.002561309584873328, | |
| "value_loss_thought": 0.004061427062742951 | |
| }, | |
| { | |
| "epoch": 10.38, | |
| "grad_norm": 0.36161029888838353, | |
| "learning_rate": 1.2914652767396602e-06, | |
| "loss": 0.0134, | |
| "sft_loss": 0.0016668380645569413, | |
| "step": 1740, | |
| "total_loss": 0.0017722387631607718, | |
| "value_loss": 0.0010540070087955654, | |
| "value_loss_search": 0.003300642109496721, | |
| "value_loss_thought": 0.005131414054389882 | |
| }, | |
| { | |
| "epoch": 10.41, | |
| "grad_norm": 0.2974708087969646, | |
| "learning_rate": 1.2762433891865e-06, | |
| "loss": 0.0143, | |
| "sft_loss": 0.0014268789207562804, | |
| "step": 1745, | |
| "total_loss": 0.0015154178811258134, | |
| "value_loss": 0.0008853895097672648, | |
| "value_loss_search": 0.0027578251025261124, | |
| "value_loss_thought": 0.004325291005079635 | |
| }, | |
| { | |
| "epoch": 10.44, | |
| "grad_norm": 0.3676755156415561, | |
| "learning_rate": 1.2610809343009588e-06, | |
| "loss": 0.0153, | |
| "sft_loss": 0.0014077324420213699, | |
| "step": 1750, | |
| "total_loss": 0.0015097191839743118, | |
| "value_loss": 0.001019867208327696, | |
| "value_loss_search": 0.003453613588283133, | |
| "value_loss_thought": 0.004705324086262408 | |
| }, | |
| { | |
| "epoch": 10.47, | |
| "grad_norm": 0.35317661644500653, | |
| "learning_rate": 1.2459786484591535e-06, | |
| "loss": 0.0138, | |
| "sft_loss": 0.0014058848028071225, | |
| "step": 1755, | |
| "total_loss": 0.0014983110793011178, | |
| "value_loss": 0.0009242627733442532, | |
| "value_loss_search": 0.0036805602864205867, | |
| "value_loss_thought": 0.0037135419420565086 | |
| }, | |
| { | |
| "epoch": 10.5, | |
| "grad_norm": 0.3090797575849808, | |
| "learning_rate": 1.2309372651150456e-06, | |
| "loss": 0.0143, | |
| "sft_loss": 0.0013164847245207057, | |
| "step": 1760, | |
| "total_loss": 0.0014013042361533223, | |
| "value_loss": 0.0008481952301963247, | |
| "value_loss_search": 0.0033867947210950433, | |
| "value_loss_thought": 0.0033987670644137326 | |
| }, | |
| { | |
| "epoch": 10.53, | |
| "grad_norm": 0.3792937092146306, | |
| "learning_rate": 1.2159575147648226e-06, | |
| "loss": 0.0138, | |
| "sft_loss": 0.0010253429325530305, | |
| "step": 1765, | |
| "total_loss": 0.0011080630618408803, | |
| "value_loss": 0.0008272012292422914, | |
| "value_loss_search": 0.003076424640460118, | |
| "value_loss_thought": 0.0035411851821436358 | |
| }, | |
| { | |
| "epoch": 10.56, | |
| "grad_norm": 0.23588251371295496, | |
| "learning_rate": 1.2010401249114166e-06, | |
| "loss": 0.0154, | |
| "sft_loss": 0.002121089934371412, | |
| "step": 1770, | |
| "total_loss": 0.002200343585804987, | |
| "value_loss": 0.0007925363796857709, | |
| "value_loss_search": 0.0028656800206931623, | |
| "value_loss_thought": 0.0034746110001947273 | |
| }, | |
| { | |
| "epoch": 10.59, | |
| "grad_norm": 0.18704537996101944, | |
| "learning_rate": 1.1861858200291754e-06, | |
| "loss": 0.015, | |
| "sft_loss": 0.001497958108666353, | |
| "step": 1775, | |
| "total_loss": 0.0015780455702179453, | |
| "value_loss": 0.0008008746483028518, | |
| "value_loss_search": 0.003131278493640366, | |
| "value_loss_thought": 0.0032757186703292972 | |
| }, | |
| { | |
| "epoch": 10.62, | |
| "grad_norm": 0.2611143768462882, | |
| "learning_rate": 1.1713953215286786e-06, | |
| "loss": 0.0146, | |
| "sft_loss": 0.0016566726000746713, | |
| "step": 1780, | |
| "total_loss": 0.0017335941357572437, | |
| "value_loss": 0.000769215394677758, | |
| "value_loss_search": 0.0035173230212080854, | |
| "value_loss_thought": 0.0026364001052343157 | |
| }, | |
| { | |
| "epoch": 10.65, | |
| "grad_norm": 0.22627379212344326, | |
| "learning_rate": 1.156669347721698e-06, | |
| "loss": 0.0139, | |
| "sft_loss": 0.0013101978547638282, | |
| "step": 1785, | |
| "total_loss": 0.0014014345401903937, | |
| "value_loss": 0.0009123669141331448, | |
| "value_loss_search": 0.0029923125522913095, | |
| "value_loss_thought": 0.004306622803687788 | |
| }, | |
| { | |
| "epoch": 10.68, | |
| "grad_norm": 0.2755772125737796, | |
| "learning_rate": 1.1420086137863187e-06, | |
| "loss": 0.0147, | |
| "sft_loss": 0.0017199999798322096, | |
| "step": 1790, | |
| "total_loss": 0.0018377338240952667, | |
| "value_loss": 0.0011773384410275866, | |
| "value_loss_search": 0.004846148737328804, | |
| "value_loss_thought": 0.004572558852294151 | |
| }, | |
| { | |
| "epoch": 10.71, | |
| "grad_norm": 0.32880460241811366, | |
| "learning_rate": 1.127413831732198e-06, | |
| "loss": 0.0139, | |
| "sft_loss": 0.001667250582249835, | |
| "step": 1795, | |
| "total_loss": 0.0017713467202923993, | |
| "value_loss": 0.0010409614306340132, | |
| "value_loss_search": 0.0038816104845068367, | |
| "value_loss_thought": 0.00444608086813787 | |
| }, | |
| { | |
| "epoch": 10.74, | |
| "grad_norm": 0.31078844068026645, | |
| "learning_rate": 1.1128857103659924e-06, | |
| "loss": 0.0148, | |
| "sft_loss": 0.0020905163779389112, | |
| "step": 1800, | |
| "total_loss": 0.0021922153910395536, | |
| "value_loss": 0.0010169901736389875, | |
| "value_loss_search": 0.0040109903552888685, | |
| "value_loss_thought": 0.004124930962098006 | |
| }, | |
| { | |
| "epoch": 10.77, | |
| "grad_norm": 0.25337131057461965, | |
| "learning_rate": 1.098424955256929e-06, | |
| "loss": 0.0146, | |
| "sft_loss": 0.0014486652828054503, | |
| "step": 1805, | |
| "total_loss": 0.0015391261362040609, | |
| "value_loss": 0.0009046085265993042, | |
| "value_loss_search": 0.0039972110742269255, | |
| "value_loss_thought": 0.0032396571875779046 | |
| }, | |
| { | |
| "epoch": 10.8, | |
| "grad_norm": 0.27541225586561385, | |
| "learning_rate": 1.084032268702546e-06, | |
| "loss": 0.016, | |
| "sft_loss": 0.0018587965198094026, | |
| "step": 1810, | |
| "total_loss": 0.0019315761407113995, | |
| "value_loss": 0.0007277963281012489, | |
| "value_loss_search": 0.002842529457825549, | |
| "value_loss_thought": 0.0029798412312629806 | |
| }, | |
| { | |
| "epoch": 10.83, | |
| "grad_norm": 0.3143782682726898, | |
| "learning_rate": 1.0697083496945766e-06, | |
| "loss": 0.0147, | |
| "sft_loss": 0.0016238773765508085, | |
| "step": 1815, | |
| "total_loss": 0.0017014974511752711, | |
| "value_loss": 0.0007762006878408556, | |
| "value_loss_search": 0.003306770018977545, | |
| "value_loss_thought": 0.002902835555346428 | |
| }, | |
| { | |
| "epoch": 10.86, | |
| "grad_norm": 0.20528023876815246, | |
| "learning_rate": 1.0554538938850067e-06, | |
| "loss": 0.0127, | |
| "sft_loss": 0.0010624434828059748, | |
| "step": 1820, | |
| "total_loss": 0.0011552063171336613, | |
| "value_loss": 0.0009276282694486327, | |
| "value_loss_search": 0.0039089625636478335, | |
| "value_loss_thought": 0.0035120635659950496 | |
| }, | |
| { | |
| "epoch": 10.89, | |
| "grad_norm": 0.3193867192070165, | |
| "learning_rate": 1.0412695935522915e-06, | |
| "loss": 0.0139, | |
| "sft_loss": 0.0015649513312382623, | |
| "step": 1825, | |
| "total_loss": 0.001644154928186481, | |
| "value_loss": 0.0007920360109665125, | |
| "value_loss_search": 0.003427566871346244, | |
| "value_loss_thought": 0.0029087212127251404 | |
| }, | |
| { | |
| "epoch": 10.92, | |
| "grad_norm": 0.2485596631221815, | |
| "learning_rate": 1.0271561375677295e-06, | |
| "loss": 0.0141, | |
| "sft_loss": 0.0012402831809595228, | |
| "step": 1830, | |
| "total_loss": 0.0013410342163723498, | |
| "value_loss": 0.0010075102685675574, | |
| "value_loss_search": 0.0032375619698996163, | |
| "value_loss_thought": 0.004822520132569253 | |
| }, | |
| { | |
| "epoch": 10.95, | |
| "grad_norm": 0.19629796573946343, | |
| "learning_rate": 1.0131142113620124e-06, | |
| "loss": 0.0145, | |
| "sft_loss": 0.0017903442290844395, | |
| "step": 1835, | |
| "total_loss": 0.0018783345309895338, | |
| "value_loss": 0.0008799031274634217, | |
| "value_loss_search": 0.002568782726038421, | |
| "value_loss_thought": 0.00447044234929308 | |
| }, | |
| { | |
| "epoch": 10.98, | |
| "grad_norm": 0.22665789788022606, | |
| "learning_rate": 9.991444968919318e-07, | |
| "loss": 0.016, | |
| "sft_loss": 0.001909774899831973, | |
| "step": 1840, | |
| "total_loss": 0.0020184893559246574, | |
| "value_loss": 0.0010871445186012353, | |
| "value_loss_search": 0.0036259381746958754, | |
| "value_loss_thought": 0.005071217814185047 | |
| }, | |
| { | |
| "epoch": 11.01, | |
| "grad_norm": 0.14121531206089535, | |
| "learning_rate": 9.85247672607262e-07, | |
| "loss": 0.0135, | |
| "sft_loss": 0.0015843365341424941, | |
| "step": 1845, | |
| "total_loss": 0.0016597445963839163, | |
| "value_loss": 0.0007540804324889904, | |
| "value_loss_search": 0.00248998072215727, | |
| "value_loss_thought": 0.0035426627241349705 | |
| }, | |
| { | |
| "epoch": 11.04, | |
| "grad_norm": 0.23904440865103438, | |
| "learning_rate": 9.714244134178111e-07, | |
| "loss": 0.0121, | |
| "sft_loss": 0.001681867046863772, | |
| "step": 1850, | |
| "total_loss": 0.0017365749472503466, | |
| "value_loss": 0.000547078986141969, | |
| "value_loss_search": 0.002276956337368574, | |
| "value_loss_thought": 0.0020996756104807446 | |
| }, | |
| { | |
| "epoch": 11.07, | |
| "grad_norm": 0.21369722290951315, | |
| "learning_rate": 9.576753906606406e-07, | |
| "loss": 0.0132, | |
| "sft_loss": 0.001991357470978983, | |
| "step": 1855, | |
| "total_loss": 0.0020535454949708763, | |
| "value_loss": 0.0006218802197111017, | |
| "value_loss_search": 0.002578609814440824, | |
| "value_loss_thought": 0.002396431967076751 | |
| }, | |
| { | |
| "epoch": 11.1, | |
| "grad_norm": 0.23349687558798843, | |
| "learning_rate": 9.440012720674669e-07, | |
| "loss": 0.0134, | |
| "sft_loss": 0.0016908970777876676, | |
| "step": 1860, | |
| "total_loss": 0.0017617013153142125, | |
| "value_loss": 0.0007080423307911588, | |
| "value_loss_search": 0.002591455921628949, | |
| "value_loss_thought": 0.003072882712245928 | |
| }, | |
| { | |
| "epoch": 11.13, | |
| "grad_norm": 0.2315106358683564, | |
| "learning_rate": 9.304027217322248e-07, | |
| "loss": 0.012, | |
| "sft_loss": 0.0011010443093255162, | |
| "step": 1865, | |
| "total_loss": 0.001180766790723453, | |
| "value_loss": 0.0007972249053409541, | |
| "value_loss_search": 0.0028063702089980323, | |
| "value_loss_thought": 0.003571429059320508 | |
| }, | |
| { | |
| "epoch": 11.16, | |
| "grad_norm": 0.19676507857751727, | |
| "learning_rate": 9.168804000788231e-07, | |
| "loss": 0.0134, | |
| "sft_loss": 0.0017542458925163373, | |
| "step": 1870, | |
| "total_loss": 0.0018465204406766134, | |
| "value_loss": 0.0009227454697338544, | |
| "value_loss_search": 0.0034655480293684705, | |
| "value_loss_thought": 0.003916415683534069 | |
| }, | |
| { | |
| "epoch": 11.19, | |
| "grad_norm": 0.20994293249210688, | |
| "learning_rate": 9.034349638290643e-07, | |
| "loss": 0.0129, | |
| "sft_loss": 0.0016363047616323456, | |
| "step": 1875, | |
| "total_loss": 0.001720267212360227, | |
| "value_loss": 0.000839624490978963, | |
| "value_loss_search": 0.0023668420385547506, | |
| "value_loss_thought": 0.004350153919847344 | |
| }, | |
| { | |
| "epoch": 11.22, | |
| "grad_norm": 0.17348271881719035, | |
| "learning_rate": 8.90067065970753e-07, | |
| "loss": 0.0117, | |
| "sft_loss": 0.0015204125025775283, | |
| "step": 1880, | |
| "total_loss": 0.00157785642841759, | |
| "value_loss": 0.0005744392913584306, | |
| "value_loss_search": 0.002059218621488412, | |
| "value_loss_thought": 0.0025362957071592973 | |
| }, | |
| { | |
| "epoch": 11.25, | |
| "grad_norm": 0.2244106678448663, | |
| "learning_rate": 8.767773557259856e-07, | |
| "loss": 0.0136, | |
| "sft_loss": 0.0018200392310973256, | |
| "step": 1885, | |
| "total_loss": 0.0018952935279571647, | |
| "value_loss": 0.0007525429511019866, | |
| "value_loss_search": 0.0032326324541486428, | |
| "value_loss_thought": 0.002787711161090556 | |
| }, | |
| { | |
| "epoch": 11.28, | |
| "grad_norm": 0.22278002454347204, | |
| "learning_rate": 8.635664785196149e-07, | |
| "loss": 0.0136, | |
| "sft_loss": 0.001562042610021308, | |
| "step": 1890, | |
| "total_loss": 0.0016376418375557479, | |
| "value_loss": 0.0007559922805285169, | |
| "value_loss_search": 0.002203879163846523, | |
| "value_loss_thought": 0.0038440591039261562 | |
| }, | |
| { | |
| "epoch": 11.31, | |
| "grad_norm": 0.16580402380208306, | |
| "learning_rate": 8.504350759479085e-07, | |
| "loss": 0.0132, | |
| "sft_loss": 0.001630876283161342, | |
| "step": 1895, | |
| "total_loss": 0.0017215145897331752, | |
| "value_loss": 0.0009063829461979367, | |
| "value_loss_search": 0.0032823619387357893, | |
| "value_loss_thought": 0.0039687016303560085 | |
| }, | |
| { | |
| "epoch": 11.34, | |
| "grad_norm": 0.1538178503850748, | |
| "learning_rate": 8.373837857473876e-07, | |
| "loss": 0.0124, | |
| "sft_loss": 0.0015245357761159539, | |
| "step": 1900, | |
| "total_loss": 0.001594149377508103, | |
| "value_loss": 0.0006961359632555286, | |
| "value_loss_search": 0.0029453484618215953, | |
| "value_loss_thought": 0.0026237392520670256 | |
| }, | |
| { | |
| "epoch": 11.37, | |
| "grad_norm": 0.22852979054995423, | |
| "learning_rate": 8.244132417638572e-07, | |
| "loss": 0.0139, | |
| "sft_loss": 0.0018581276090117171, | |
| "step": 1905, | |
| "total_loss": 0.0019236322099402514, | |
| "value_loss": 0.0006550459199843317, | |
| "value_loss_search": 0.002184331477405976, | |
| "value_loss_thought": 0.0030560358827074197 | |
| }, | |
| { | |
| "epoch": 11.4, | |
| "grad_norm": 0.17979282429716514, | |
| "learning_rate": 8.115240739216182e-07, | |
| "loss": 0.0135, | |
| "sft_loss": 0.0016053789819125085, | |
| "step": 1910, | |
| "total_loss": 0.0016754228590059484, | |
| "value_loss": 0.0007004386914104543, | |
| "value_loss_search": 0.0024565162927842723, | |
| "value_loss_thought": 0.003146993195559844 | |
| }, | |
| { | |
| "epoch": 11.43, | |
| "grad_norm": 0.2143766598366511, | |
| "learning_rate": 7.987169081928808e-07, | |
| "loss": 0.0113, | |
| "sft_loss": 0.0012951746117323636, | |
| "step": 1915, | |
| "total_loss": 0.0013838895008234432, | |
| "value_loss": 0.0008871489512898734, | |
| "value_loss_search": 0.0030084542974748276, | |
| "value_loss_thought": 0.004088737367703743 | |
| }, | |
| { | |
| "epoch": 11.46, | |
| "grad_norm": 0.16901429392294579, | |
| "learning_rate": 7.859923665673577e-07, | |
| "loss": 0.0138, | |
| "sft_loss": 0.0017941196507308631, | |
| "step": 1920, | |
| "total_loss": 0.0018495924976690502, | |
| "value_loss": 0.0005547285868487961, | |
| "value_loss_search": 0.002195003875567636, | |
| "value_loss_thought": 0.0022428248138567143 | |
| }, | |
| { | |
| "epoch": 11.49, | |
| "grad_norm": 0.14963676330146067, | |
| "learning_rate": 7.733510670220592e-07, | |
| "loss": 0.0127, | |
| "sft_loss": 0.0016209140827413647, | |
| "step": 1925, | |
| "total_loss": 0.0016745970182368453, | |
| "value_loss": 0.0005368294205496226, | |
| "value_loss_search": 0.0021613026585526997, | |
| "value_loss_thought": 0.0021333327193019612 | |
| }, | |
| { | |
| "epoch": 11.52, | |
| "grad_norm": 0.1560856898990678, | |
| "learning_rate": 7.607936234912841e-07, | |
| "loss": 0.0128, | |
| "sft_loss": 0.0012635959719773382, | |
| "step": 1930, | |
| "total_loss": 0.0013598649700924171, | |
| "value_loss": 0.0009626899960949231, | |
| "value_loss_search": 0.003141054221191553, | |
| "value_loss_thought": 0.004560465800022939 | |
| }, | |
| { | |
| "epoch": 11.55, | |
| "grad_norm": 0.1963172359932867, | |
| "learning_rate": 7.48320645836797e-07, | |
| "loss": 0.013, | |
| "sft_loss": 0.001730994725949131, | |
| "step": 1935, | |
| "total_loss": 0.0018184438404333036, | |
| "value_loss": 0.0008744911316171056, | |
| "value_loss_search": 0.0031509772675512695, | |
| "value_loss_thought": 0.003844951791325002 | |
| }, | |
| { | |
| "epoch": 11.58, | |
| "grad_norm": 0.2668333028531531, | |
| "learning_rate": 7.359327398182145e-07, | |
| "loss": 0.0129, | |
| "sft_loss": 0.0013168820936698467, | |
| "step": 1940, | |
| "total_loss": 0.0013926543368199874, | |
| "value_loss": 0.0007577225714385349, | |
| "value_loss_search": 0.003037126008916857, | |
| "value_loss_thought": 0.003024654608429955 | |
| }, | |
| { | |
| "epoch": 11.61, | |
| "grad_norm": 0.222868660291957, | |
| "learning_rate": 7.236305070635835e-07, | |
| "loss": 0.0143, | |
| "sft_loss": 0.0016936144500505179, | |
| "step": 1945, | |
| "total_loss": 0.0017581287969392178, | |
| "value_loss": 0.0006451433262554928, | |
| "value_loss_search": 0.0023534947292660037, | |
| "value_loss_thought": 0.002807651879811601 | |
| }, | |
| { | |
| "epoch": 11.64, | |
| "grad_norm": 0.361284370578401, | |
| "learning_rate": 7.114145450401666e-07, | |
| "loss": 0.0128, | |
| "sft_loss": 0.001547012195806019, | |
| "step": 1950, | |
| "total_loss": 0.001618750787525869, | |
| "value_loss": 0.000717385778938251, | |
| "value_loss_search": 0.0024508829355568197, | |
| "value_loss_thought": 0.0032882033142414002 | |
| }, | |
| { | |
| "epoch": 11.67, | |
| "grad_norm": 0.13552700047134153, | |
| "learning_rate": 6.992854470254207e-07, | |
| "loss": 0.013, | |
| "sft_loss": 0.001385022871545516, | |
| "step": 1955, | |
| "total_loss": 0.0014542039817172282, | |
| "value_loss": 0.0006918109331763844, | |
| "value_loss_search": 0.0027121378843503407, | |
| "value_loss_thought": 0.0028223496028431327 | |
| }, | |
| { | |
| "epoch": 11.7, | |
| "grad_norm": 0.22110946390490138, | |
| "learning_rate": 6.872438020781855e-07, | |
| "loss": 0.0133, | |
| "sft_loss": 0.0019112714886432513, | |
| "step": 1960, | |
| "total_loss": 0.0021918389610931397, | |
| "value_loss": 0.002805674577484751, | |
| "value_loss_search": 0.0025011180584272098, | |
| "value_loss_thought": 0.01994427887461825 | |
| }, | |
| { | |
| "epoch": 11.73, | |
| "grad_norm": 0.19717560920606997, | |
| "learning_rate": 6.752901950100796e-07, | |
| "loss": 0.0135, | |
| "sft_loss": 0.0014557878545019775, | |
| "step": 1965, | |
| "total_loss": 0.0015295138876126658, | |
| "value_loss": 0.0007372604449756182, | |
| "value_loss_search": 0.0031169248416858864, | |
| "value_loss_thought": 0.0027811587181304277 | |
| }, | |
| { | |
| "epoch": 11.76, | |
| "grad_norm": 0.19011787464804747, | |
| "learning_rate": 6.634252063570909e-07, | |
| "loss": 0.0133, | |
| "sft_loss": 0.0018082802678691223, | |
| "step": 1970, | |
| "total_loss": 0.0018666912741366558, | |
| "value_loss": 0.0005841100078200157, | |
| "value_loss_search": 0.0022486982484906546, | |
| "value_loss_thought": 0.0024241818446625986 | |
| }, | |
| { | |
| "epoch": 11.79, | |
| "grad_norm": 0.1689996116493542, | |
| "learning_rate": 6.516494123513911e-07, | |
| "loss": 0.0121, | |
| "sft_loss": 0.0014497063646558672, | |
| "step": 1975, | |
| "total_loss": 0.0015385708714291014, | |
| "value_loss": 0.0008886449425062892, | |
| "value_loss_search": 0.0024655752657849915, | |
| "value_loss_thought": 0.0046435843172275785 | |
| }, | |
| { | |
| "epoch": 11.82, | |
| "grad_norm": 0.15943109781856352, | |
| "learning_rate": 6.399633848933434e-07, | |
| "loss": 0.0133, | |
| "sft_loss": 0.0014925144583685323, | |
| "step": 1980, | |
| "total_loss": 0.0015526211048978666, | |
| "value_loss": 0.0006010664543737221, | |
| "value_loss_search": 0.0027871506780570597, | |
| "value_loss_thought": 0.0020213810148561606 | |
| }, | |
| { | |
| "epoch": 11.85, | |
| "grad_norm": 0.16717165760347577, | |
| "learning_rate": 6.283676915237307e-07, | |
| "loss": 0.013, | |
| "sft_loss": 0.001696960357367061, | |
| "step": 1985, | |
| "total_loss": 0.0017603883717299596, | |
| "value_loss": 0.0006342800129914394, | |
| "value_loss_search": 0.002824323731066869, | |
| "value_loss_thought": 0.0022499163406337177 | |
| }, | |
| { | |
| "epoch": 11.88, | |
| "grad_norm": 0.15142680254994328, | |
| "learning_rate": 6.16862895396193e-07, | |
| "loss": 0.0139, | |
| "sft_loss": 0.001581608026754111, | |
| "step": 1990, | |
| "total_loss": 0.0016601021626520662, | |
| "value_loss": 0.0007849412620771545, | |
| "value_loss_search": 0.002948937653894745, | |
| "value_loss_thought": 0.003330592447127856 | |
| }, | |
| { | |
| "epoch": 11.91, | |
| "grad_norm": 0.19276901421529766, | |
| "learning_rate": 6.054495552498779e-07, | |
| "loss": 0.0137, | |
| "sft_loss": 0.0018120755994459614, | |
| "step": 1995, | |
| "total_loss": 0.001886463062464827, | |
| "value_loss": 0.0007438746942170837, | |
| "value_loss_search": 0.0030197886319228927, | |
| "value_loss_thought": 0.0029312088817277982 | |
| }, | |
| { | |
| "epoch": 11.94, | |
| "grad_norm": 0.15841124656299638, | |
| "learning_rate": 5.941282253823019e-07, | |
| "loss": 0.0124, | |
| "sft_loss": 0.0017132775596110151, | |
| "step": 2000, | |
| "total_loss": 0.0018226395098338345, | |
| "value_loss": 0.0010936193576071673, | |
| "value_loss_search": 0.004504057419785568, | |
| "value_loss_thought": 0.004244897459489039 | |
| }, | |
| { | |
| "epoch": 11.97, | |
| "grad_norm": 0.19080328469455396, | |
| "learning_rate": 5.828994556224333e-07, | |
| "loss": 0.0133, | |
| "sft_loss": 0.0016100038104923443, | |
| "step": 2005, | |
| "total_loss": 0.001695921379121046, | |
| "value_loss": 0.0008591757061367389, | |
| "value_loss_search": 0.0032462079869560513, | |
| "value_loss_thought": 0.0036271976513944535 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "grad_norm": 0.42540669262730385, | |
| "learning_rate": 5.717637913039895e-07, | |
| "loss": 0.0142, | |
| "sft_loss": 0.0014130673225736246, | |
| "step": 2010, | |
| "total_loss": 0.001474345298233004, | |
| "value_loss": 0.0006127797149346748, | |
| "value_loss_search": 0.0022098295013165624, | |
| "value_loss_thought": 0.002692408211157726 | |
| }, | |
| { | |
| "epoch": 12.03, | |
| "grad_norm": 0.13234597224013306, | |
| "learning_rate": 5.607217732389503e-07, | |
| "loss": 0.0134, | |
| "sft_loss": 0.00153753467311617, | |
| "step": 2015, | |
| "total_loss": 0.0015983398959235728, | |
| "value_loss": 0.0006080522839738478, | |
| "value_loss_search": 0.002334933211577095, | |
| "value_loss_thought": 0.002529485058130376 | |
| }, | |
| { | |
| "epoch": 12.06, | |
| "grad_norm": 0.137160131767248, | |
| "learning_rate": 5.497739376912956e-07, | |
| "loss": 0.0109, | |
| "sft_loss": 0.0011861874081660062, | |
| "step": 2020, | |
| "total_loss": 0.0012396710289571899, | |
| "value_loss": 0.0005348361521200218, | |
| "value_loss_search": 0.0019204338413487676, | |
| "value_loss_thought": 0.002358255369199469 | |
| }, | |
| { | |
| "epoch": 12.09, | |
| "grad_norm": 0.12635424611743207, | |
| "learning_rate": 5.389208163509585e-07, | |
| "loss": 0.0132, | |
| "sft_loss": 0.0018190979899372905, | |
| "step": 2025, | |
| "total_loss": 0.001879164192882854, | |
| "value_loss": 0.0006006620240896155, | |
| "value_loss_search": 0.0024004459903380847, | |
| "value_loss_thought": 0.002404850223280164 | |
| }, | |
| { | |
| "epoch": 12.11, | |
| "grad_norm": 0.18160270722099203, | |
| "learning_rate": 5.281629363080054e-07, | |
| "loss": 0.0122, | |
| "sft_loss": 0.0015674298861995339, | |
| "step": 2030, | |
| "total_loss": 0.001623824713238875, | |
| "value_loss": 0.0005639481589923889, | |
| "value_loss_search": 0.002164925300516529, | |
| "value_loss_thought": 0.002346659959630415 | |
| }, | |
| { | |
| "epoch": 12.14, | |
| "grad_norm": 0.1417342739770937, | |
| "learning_rate": 5.175008200270368e-07, | |
| "loss": 0.0125, | |
| "sft_loss": 0.0014136525540379807, | |
| "step": 2035, | |
| "total_loss": 0.0014688616164136193, | |
| "value_loss": 0.0005520905897583361, | |
| "value_loss_search": 0.0017787457187345979, | |
| "value_loss_thought": 0.0026379790163218787 | |
| }, | |
| { | |
| "epoch": 12.17, | |
| "grad_norm": 0.1290358512992214, | |
| "learning_rate": 5.06934985321813e-07, | |
| "loss": 0.0113, | |
| "sft_loss": 0.0016105213377159088, | |
| "step": 2040, | |
| "total_loss": 0.0016679565724615486, | |
| "value_loss": 0.0005743524450053883, | |
| "value_loss_search": 0.0021123728056977598, | |
| "value_loss_thought": 0.0024824467762755375 | |
| }, | |
| { | |
| "epoch": 12.2, | |
| "grad_norm": 0.17303760430312853, | |
| "learning_rate": 4.964659453301088e-07, | |
| "loss": 0.0125, | |
| "sft_loss": 0.0017713219043798746, | |
| "step": 2045, | |
| "total_loss": 0.001820996348112658, | |
| "value_loss": 0.0004967445772763313, | |
| "value_loss_search": 0.0021500766970802944, | |
| "value_loss_thought": 0.0018238799311802723 | |
| }, | |
| { | |
| "epoch": 12.23, | |
| "grad_norm": 0.13656860205410482, | |
| "learning_rate": 4.860942084887868e-07, | |
| "loss": 0.012, | |
| "sft_loss": 0.0011710747960023582, | |
| "step": 2050, | |
| "total_loss": 0.0012223293429954652, | |
| "value_loss": 0.0005125455333654827, | |
| "value_loss_search": 0.0025454429445233018, | |
| "value_loss_thought": 0.0015549213211500045 | |
| }, | |
| { | |
| "epoch": 12.26, | |
| "grad_norm": 0.17146623033662603, | |
| "learning_rate": 4.758202785091118e-07, | |
| "loss": 0.0122, | |
| "sft_loss": 0.0015416829177411274, | |
| "step": 2055, | |
| "total_loss": 0.0015948826805882276, | |
| "value_loss": 0.0005319975216764306, | |
| "value_loss_search": 0.0020023466028078474, | |
| "value_loss_thought": 0.002253633565032942 | |
| }, | |
| { | |
| "epoch": 12.29, | |
| "grad_norm": 0.13131858410389194, | |
| "learning_rate": 4.656446543522822e-07, | |
| "loss": 0.0114, | |
| "sft_loss": 0.0015963483776431531, | |
| "step": 2060, | |
| "total_loss": 0.0016545026784314132, | |
| "value_loss": 0.000581543093721848, | |
| "value_loss_search": 0.002123642799631398, | |
| "value_loss_thought": 0.00252870192825867 | |
| }, | |
| { | |
| "epoch": 12.32, | |
| "grad_norm": 0.1387225576415455, | |
| "learning_rate": 4.555678302051988e-07, | |
| "loss": 0.0125, | |
| "sft_loss": 0.0011560205864952877, | |
| "step": 2065, | |
| "total_loss": 0.0012139756489808918, | |
| "value_loss": 0.0005795506685103647, | |
| "value_loss_search": 0.0022142785405321773, | |
| "value_loss_thought": 0.0024221268018152385 | |
| }, | |
| { | |
| "epoch": 12.35, | |
| "grad_norm": 0.13838555141205885, | |
| "learning_rate": 4.4559029545646835e-07, | |
| "loss": 0.0128, | |
| "sft_loss": 0.0019845320377498863, | |
| "step": 2070, | |
| "total_loss": 0.002043302868487018, | |
| "value_loss": 0.0005877082873098516, | |
| "value_loss_search": 0.001886277649805379, | |
| "value_loss_thought": 0.0028153886438985866 | |
| }, | |
| { | |
| "epoch": 12.38, | |
| "grad_norm": 0.1372443532006686, | |
| "learning_rate": 4.357125346726293e-07, | |
| "loss": 0.013, | |
| "sft_loss": 0.001509127317694947, | |
| "step": 2075, | |
| "total_loss": 0.0015627349823276403, | |
| "value_loss": 0.0005360765391628775, | |
| "value_loss_search": 0.0020487196703186327, | |
| "value_loss_thought": 0.0022398926420464705 | |
| }, | |
| { | |
| "epoch": 12.41, | |
| "grad_norm": 0.13844786045080867, | |
| "learning_rate": 4.2593502757462326e-07, | |
| "loss": 0.0122, | |
| "sft_loss": 0.0017603133688680827, | |
| "step": 2080, | |
| "total_loss": 0.0018168458402463728, | |
| "value_loss": 0.0005653246701513126, | |
| "value_loss_search": 0.0017796060631980026, | |
| "value_loss_thought": 0.0027429913518062675 | |
| }, | |
| { | |
| "epoch": 12.44, | |
| "grad_norm": 0.13268164718583428, | |
| "learning_rate": 4.162582490144948e-07, | |
| "loss": 0.0134, | |
| "sft_loss": 0.001744911610148847, | |
| "step": 2085, | |
| "total_loss": 0.001791581775782447, | |
| "value_loss": 0.0004667015931431706, | |
| "value_loss_search": 0.002076370053646315, | |
| "value_loss_thought": 0.0016572426828588504 | |
| }, | |
| { | |
| "epoch": 12.47, | |
| "grad_norm": 0.1639448919467817, | |
| "learning_rate": 4.066826689523329e-07, | |
| "loss": 0.0124, | |
| "sft_loss": 0.0014559761038981378, | |
| "step": 2090, | |
| "total_loss": 0.0015073618816018098, | |
| "value_loss": 0.0005138578154856077, | |
| "value_loss_search": 0.0021935081386459387, | |
| "value_loss_thought": 0.0019173543787928792 | |
| }, | |
| { | |
| "epoch": 12.5, | |
| "grad_norm": 0.12404791787908588, | |
| "learning_rate": 3.972087524334417e-07, | |
| "loss": 0.0129, | |
| "sft_loss": 0.0014528931671520696, | |
| "step": 2095, | |
| "total_loss": 0.0015010984525360982, | |
| "value_loss": 0.00048205279412059097, | |
| "value_loss_search": 0.0017939364208864107, | |
| "value_loss_thought": 0.002062485937221936 | |
| }, | |
| { | |
| "epoch": 12.53, | |
| "grad_norm": 0.11913134655746999, | |
| "learning_rate": 3.8783695956576104e-07, | |
| "loss": 0.0124, | |
| "sft_loss": 0.0014827497507212684, | |
| "step": 2100, | |
| "total_loss": 0.001538775590775998, | |
| "value_loss": 0.0005602583627023705, | |
| "value_loss_search": 0.0021692122728495635, | |
| "value_loss_thought": 0.0023128546302928045 | |
| }, | |
| { | |
| "epoch": 12.56, | |
| "grad_norm": 0.2114888612576018, | |
| "learning_rate": 3.785677454975162e-07, | |
| "loss": 0.0129, | |
| "sft_loss": 0.0016362351918360219, | |
| "step": 2105, | |
| "total_loss": 0.0016834996304623928, | |
| "value_loss": 0.00047264444830261707, | |
| "value_loss_search": 0.0016264815088106843, | |
| "value_loss_thought": 0.002154674075870844 | |
| }, | |
| { | |
| "epoch": 12.59, | |
| "grad_norm": 0.15216955248234498, | |
| "learning_rate": 3.6940156039511536e-07, | |
| "loss": 0.0131, | |
| "sft_loss": 0.0011432622733991594, | |
| "step": 2110, | |
| "total_loss": 0.001217301837471041, | |
| "value_loss": 0.0007403955911058802, | |
| "value_loss_search": 0.003449674524733837, | |
| "value_loss_thought": 0.00247349016779026 | |
| }, | |
| { | |
| "epoch": 12.62, | |
| "grad_norm": 0.16263392439324273, | |
| "learning_rate": 3.603388494212892e-07, | |
| "loss": 0.013, | |
| "sft_loss": 0.0014238910138374195, | |
| "step": 2115, | |
| "total_loss": 0.001483250133594538, | |
| "value_loss": 0.0005935910872267413, | |
| "value_loss_search": 0.0018694708225950763, | |
| "value_loss_thought": 0.0028792579010541884 | |
| }, | |
| { | |
| "epoch": 12.65, | |
| "grad_norm": 0.1366455163894168, | |
| "learning_rate": 3.5138005271346643e-07, | |
| "loss": 0.0125, | |
| "sft_loss": 0.0016499412042321638, | |
| "step": 2120, | |
| "total_loss": 0.001711774785312059, | |
| "value_loss": 0.0006183358492023672, | |
| "value_loss_search": 0.002253155542811669, | |
| "value_loss_thought": 0.002693531273280314 | |
| }, | |
| { | |
| "epoch": 12.68, | |
| "grad_norm": 0.1723979401773517, | |
| "learning_rate": 3.425256053624013e-07, | |
| "loss": 0.0136, | |
| "sft_loss": 0.0012428588001057506, | |
| "step": 2125, | |
| "total_loss": 0.0012996991815896308, | |
| "value_loss": 0.0005684036681486759, | |
| "value_loss_search": 0.002460998028504946, | |
| "value_loss_thought": 0.002086231320936349 | |
| }, | |
| { | |
| "epoch": 12.71, | |
| "grad_norm": 0.13838742752770622, | |
| "learning_rate": 3.3377593739104207e-07, | |
| "loss": 0.0124, | |
| "sft_loss": 0.001705869528814219, | |
| "step": 2130, | |
| "total_loss": 0.0017658297125933587, | |
| "value_loss": 0.0005996018458176878, | |
| "value_loss_search": 0.0025955964968829904, | |
| "value_loss_thought": 0.002201218291384066 | |
| }, | |
| { | |
| "epoch": 12.74, | |
| "grad_norm": 0.11587188180305923, | |
| "learning_rate": 3.2513147373364864e-07, | |
| "loss": 0.0119, | |
| "sft_loss": 0.001407682741410099, | |
| "step": 2135, | |
| "total_loss": 0.0014551524436541285, | |
| "value_loss": 0.0004746971244514953, | |
| "value_loss_search": 0.0019987275512676206, | |
| "value_loss_thought": 0.0017988494690371227 | |
| }, | |
| { | |
| "epoch": 12.77, | |
| "grad_norm": 0.14689001469256818, | |
| "learning_rate": 3.165926342151518e-07, | |
| "loss": 0.0123, | |
| "sft_loss": 0.0012021351605653763, | |
| "step": 2140, | |
| "total_loss": 0.0012947583980004395, | |
| "value_loss": 0.0009262323781513259, | |
| "value_loss_search": 0.002333370926893963, | |
| "value_loss_thought": 0.005076488072023722 | |
| }, | |
| { | |
| "epoch": 12.8, | |
| "grad_norm": 0.19822181831478017, | |
| "learning_rate": 3.0815983353076647e-07, | |
| "loss": 0.0128, | |
| "sft_loss": 0.0016310029430314898, | |
| "step": 2145, | |
| "total_loss": 0.0016860840906559814, | |
| "value_loss": 0.0005508116572968902, | |
| "value_loss_search": 0.0021066489629220086, | |
| "value_loss_thought": 0.0022998442878133575 | |
| }, | |
| { | |
| "epoch": 12.83, | |
| "grad_norm": 0.16816840054773924, | |
| "learning_rate": 2.998334812258524e-07, | |
| "loss": 0.0108, | |
| "sft_loss": 0.0014653883612481878, | |
| "step": 2150, | |
| "total_loss": 0.0015446662450301573, | |
| "value_loss": 0.0007927789123186813, | |
| "value_loss_search": 0.0023768133905377907, | |
| "value_loss_thought": 0.00396541793236338 | |
| }, | |
| { | |
| "epoch": 12.86, | |
| "grad_norm": 0.16375843499526804, | |
| "learning_rate": 2.9161398167602053e-07, | |
| "loss": 0.0126, | |
| "sft_loss": 0.001740490208612755, | |
| "step": 2155, | |
| "total_loss": 0.0018277797405071184, | |
| "value_loss": 0.0008728954095772679, | |
| "value_loss_search": 0.0033813179442745422, | |
| "value_loss_thought": 0.003601845331900222 | |
| }, | |
| { | |
| "epoch": 12.89, | |
| "grad_norm": 0.24414748866003813, | |
| "learning_rate": 2.8350173406749975e-07, | |
| "loss": 0.0131, | |
| "sft_loss": 0.0017086814332287759, | |
| "step": 2160, | |
| "total_loss": 0.001792904332711487, | |
| "value_loss": 0.0008422289131431171, | |
| "value_loss_search": 0.002163193000239971, | |
| "value_loss_thought": 0.0045746383284949845 | |
| }, | |
| { | |
| "epoch": 12.92, | |
| "grad_norm": 0.14738499094459379, | |
| "learning_rate": 2.75497132377745e-07, | |
| "loss": 0.0126, | |
| "sft_loss": 0.0015413039014674722, | |
| "step": 2165, | |
| "total_loss": 0.0015857949202086274, | |
| "value_loss": 0.00044491018940107097, | |
| "value_loss_search": 0.0015283264680988396, | |
| "value_loss_thought": 0.0020309550552781276 | |
| }, | |
| { | |
| "epoch": 12.95, | |
| "grad_norm": 0.1314803478632567, | |
| "learning_rate": 2.676005653563063e-07, | |
| "loss": 0.0122, | |
| "sft_loss": 0.0012932573270518332, | |
| "step": 2170, | |
| "total_loss": 0.0013801320299876353, | |
| "value_loss": 0.0008687470835752719, | |
| "value_loss_search": 0.0031730859160461478, | |
| "value_loss_thought": 0.0037768907095369285 | |
| }, | |
| { | |
| "epoch": 12.98, | |
| "grad_norm": 0.15101872666959495, | |
| "learning_rate": 2.5981241650594736e-07, | |
| "loss": 0.0116, | |
| "sft_loss": 0.001474944083020091, | |
| "step": 2175, | |
| "total_loss": 0.0015574592757964466, | |
| "value_loss": 0.0008251519188775091, | |
| "value_loss_search": 0.0024292909915487825, | |
| "value_loss_thought": 0.00417192430927571 | |
| }, | |
| { | |
| "epoch": 13.01, | |
| "grad_norm": 0.12826566707799528, | |
| "learning_rate": 2.5213306406402263e-07, | |
| "loss": 0.0133, | |
| "sft_loss": 0.0017561075917910784, | |
| "step": 2180, | |
| "total_loss": 0.0018071690113764306, | |
| "value_loss": 0.0005106141129658682, | |
| "value_loss_search": 0.0019689877941118537, | |
| "value_loss_thought": 0.0021159251197332197 | |
| }, | |
| { | |
| "epoch": 13.04, | |
| "grad_norm": 0.1709754478860301, | |
| "learning_rate": 2.445628809841055e-07, | |
| "loss": 0.0114, | |
| "sft_loss": 0.0015144431439694018, | |
| "step": 2185, | |
| "total_loss": 0.001573674503174516, | |
| "value_loss": 0.0005923136392084416, | |
| "value_loss_search": 0.001896365256311583, | |
| "value_loss_thought": 0.0028421438521490927 | |
| }, | |
| { | |
| "epoch": 13.07, | |
| "grad_norm": 0.10695694103625902, | |
| "learning_rate": 2.3710223491787643e-07, | |
| "loss": 0.0115, | |
| "sft_loss": 0.0011580413149204106, | |
| "step": 2190, | |
| "total_loss": 0.0012181214089466152, | |
| "value_loss": 0.0006008008974731638, | |
| "value_loss_search": 0.0022067354780915594, | |
| "value_loss_thought": 0.0025996716371764705 | |
| }, | |
| { | |
| "epoch": 13.1, | |
| "grad_norm": 0.1223481366934227, | |
| "learning_rate": 2.2975148819726844e-07, | |
| "loss": 0.0123, | |
| "sft_loss": 0.0013828211929649114, | |
| "step": 2195, | |
| "total_loss": 0.001459511714705286, | |
| "value_loss": 0.0007669052273740817, | |
| "value_loss_search": 0.0029846215529801155, | |
| "value_loss_thought": 0.0031506202247840063 | |
| }, | |
| { | |
| "epoch": 13.13, | |
| "grad_norm": 0.12035969601588177, | |
| "learning_rate": 2.2251099781686853e-07, | |
| "loss": 0.0118, | |
| "sft_loss": 0.0010231956985080615, | |
| "step": 2200, | |
| "total_loss": 0.0010796000485257195, | |
| "value_loss": 0.0005640435443638126, | |
| "value_loss_search": 0.0022480406605694726, | |
| "value_loss_thought": 0.0022643077172460834 | |
| }, | |
| { | |
| "epoch": 13.16, | |
| "grad_norm": 0.173715525744218, | |
| "learning_rate": 2.1538111541658246e-07, | |
| "loss": 0.0115, | |
| "sft_loss": 0.0012041608191793785, | |
| "step": 2205, | |
| "total_loss": 0.0012736963247903077, | |
| "value_loss": 0.0006953551075753239, | |
| "value_loss_search": 0.0023029572450525395, | |
| "value_loss_thought": 0.0032598836100532933 | |
| }, | |
| { | |
| "epoch": 13.19, | |
| "grad_norm": 0.12560183999239857, | |
| "learning_rate": 2.0836218726455416e-07, | |
| "loss": 0.0116, | |
| "sft_loss": 0.001454232243122533, | |
| "step": 2210, | |
| "total_loss": 0.0015192492540819559, | |
| "value_loss": 0.0006501699334421574, | |
| "value_loss_search": 0.0016968745266581209, | |
| "value_loss_thought": 0.0035044849046244053 | |
| }, | |
| { | |
| "epoch": 13.22, | |
| "grad_norm": 0.15088975343987443, | |
| "learning_rate": 2.0145455424035065e-07, | |
| "loss": 0.0123, | |
| "sft_loss": 0.0014402579807210713, | |
| "step": 2215, | |
| "total_loss": 0.001486383965337268, | |
| "value_loss": 0.00046125984255240837, | |
| "value_loss_search": 0.0020269616570260498, | |
| "value_loss_thought": 0.001663117084353871 | |
| }, | |
| { | |
| "epoch": 13.25, | |
| "grad_norm": 0.1168644695587691, | |
| "learning_rate": 1.9465855181840742e-07, | |
| "loss": 0.0123, | |
| "sft_loss": 0.0016895142354769633, | |
| "step": 2220, | |
| "total_loss": 0.0017325070250297614, | |
| "value_loss": 0.0004299280713553344, | |
| "value_loss_search": 0.0017563865643637655, | |
| "value_loss_thought": 0.0016830380049214 | |
| }, | |
| { | |
| "epoch": 13.28, | |
| "grad_norm": 0.12969058359006697, | |
| "learning_rate": 1.8797451005173384e-07, | |
| "loss": 0.0126, | |
| "sft_loss": 0.0016237141971942037, | |
| "step": 2225, | |
| "total_loss": 0.0016805375176829785, | |
| "value_loss": 0.000568233198055168, | |
| "value_loss_search": 0.0019997224272742644, | |
| "value_loss_thought": 0.002546143160088832 | |
| }, | |
| { | |
| "epoch": 13.31, | |
| "grad_norm": 0.1464573687686116, | |
| "learning_rate": 1.8140275355588682e-07, | |
| "loss": 0.0119, | |
| "sft_loss": 0.0013673121546162292, | |
| "step": 2230, | |
| "total_loss": 0.0014293207376852024, | |
| "value_loss": 0.0006200857808948967, | |
| "value_loss_search": 0.0028769256222062724, | |
| "value_loss_thought": 0.0020837606152554144 | |
| }, | |
| { | |
| "epoch": 13.34, | |
| "grad_norm": 0.13416087510039532, | |
| "learning_rate": 1.749436014932021e-07, | |
| "loss": 0.0131, | |
| "sft_loss": 0.001349821488838643, | |
| "step": 2235, | |
| "total_loss": 0.001453561357747901, | |
| "value_loss": 0.0010373986635158871, | |
| "value_loss_search": 0.003940903465206702, | |
| "value_loss_thought": 0.004358286027468239 | |
| }, | |
| { | |
| "epoch": 13.37, | |
| "grad_norm": 0.12129590354158527, | |
| "learning_rate": 1.68597367557298e-07, | |
| "loss": 0.0122, | |
| "sft_loss": 0.001293862346210517, | |
| "step": 2240, | |
| "total_loss": 0.0013660816371590557, | |
| "value_loss": 0.000722192872768801, | |
| "value_loss_search": 0.002515532513518792, | |
| "value_loss_thought": 0.0032620103815133917 | |
| }, | |
| { | |
| "epoch": 13.4, | |
| "grad_norm": 0.13830951441212483, | |
| "learning_rate": 1.6236435995783644e-07, | |
| "loss": 0.0122, | |
| "sft_loss": 0.0014756130083696916, | |
| "step": 2245, | |
| "total_loss": 0.0015344153451877674, | |
| "value_loss": 0.000588023400473503, | |
| "value_loss_search": 0.0021232158583757155, | |
| "value_loss_thought": 0.0025809713747094063 | |
| }, | |
| { | |
| "epoch": 13.43, | |
| "grad_norm": 0.14177549511437526, | |
| "learning_rate": 1.5624488140555673e-07, | |
| "loss": 0.0122, | |
| "sft_loss": 0.001884979850728996, | |
| "step": 2250, | |
| "total_loss": 0.0019392464295002298, | |
| "value_loss": 0.0005426657894744835, | |
| "value_loss_search": 0.002217422648698175, | |
| "value_loss_thought": 0.0021239036757833675 | |
| }, | |
| { | |
| "epoch": 13.46, | |
| "grad_norm": 0.1357612842710405, | |
| "learning_rate": 1.5023922909757543e-07, | |
| "loss": 0.0116, | |
| "sft_loss": 0.001019277679733932, | |
| "step": 2255, | |
| "total_loss": 0.0010746703279515657, | |
| "value_loss": 0.000553926424036888, | |
| "value_loss_search": 0.0021177719290335515, | |
| "value_loss_thought": 0.002313639441763371 | |
| }, | |
| { | |
| "epoch": 13.49, | |
| "grad_norm": 0.1299080051431472, | |
| "learning_rate": 1.44347694702949e-07, | |
| "loss": 0.0122, | |
| "sft_loss": 0.0013602351624285801, | |
| "step": 2260, | |
| "total_loss": 0.001403163997850676, | |
| "value_loss": 0.0004292882472554993, | |
| "value_loss_search": 0.0016838242217204424, | |
| "value_loss_thought": 0.0017504817547660423 | |
| }, | |
| { | |
| "epoch": 13.52, | |
| "grad_norm": 0.1286701282026923, | |
| "learning_rate": 1.3857056434851301e-07, | |
| "loss": 0.0116, | |
| "sft_loss": 0.0010856040549697354, | |
| "step": 2265, | |
| "total_loss": 0.001160654007901485, | |
| "value_loss": 0.0007504995341719223, | |
| "value_loss_search": 0.002396246173121419, | |
| "value_loss_thought": 0.003607750001219756 | |
| }, | |
| { | |
| "epoch": 13.55, | |
| "grad_norm": 0.14396728510258697, | |
| "learning_rate": 1.3290811860498242e-07, | |
| "loss": 0.0108, | |
| "sft_loss": 0.0011329900531563907, | |
| "step": 2270, | |
| "total_loss": 0.001192187089957031, | |
| "value_loss": 0.0005919702982509989, | |
| "value_loss_search": 0.001987281997116952, | |
| "value_loss_thought": 0.0027484804012374298 | |
| }, | |
| { | |
| "epoch": 13.58, | |
| "grad_norm": 0.11133502723535868, | |
| "learning_rate": 1.273606324733284e-07, | |
| "loss": 0.0119, | |
| "sft_loss": 0.0012918035121401773, | |
| "step": 2275, | |
| "total_loss": 0.0013533333825648697, | |
| "value_loss": 0.0006152988007769977, | |
| "value_loss_search": 0.0025145169366169286, | |
| "value_loss_thought": 0.002407873464107979 | |
| }, | |
| { | |
| "epoch": 13.61, | |
| "grad_norm": 0.12100244913687518, | |
| "learning_rate": 1.2192837537142065e-07, | |
| "loss": 0.0115, | |
| "sft_loss": 0.0015881910978350789, | |
| "step": 2280, | |
| "total_loss": 0.0016699408407248484, | |
| "value_loss": 0.0008174974953490732, | |
| "value_loss_search": 0.0026218705085824467, | |
| "value_loss_thought": 0.003918109554024341 | |
| }, | |
| { | |
| "epoch": 13.64, | |
| "grad_norm": 0.1389992640143122, | |
| "learning_rate": 1.1661161112094421e-07, | |
| "loss": 0.0116, | |
| "sft_loss": 0.0014317982335342095, | |
| "step": 2285, | |
| "total_loss": 0.0014963600385272003, | |
| "value_loss": 0.0006456180733948713, | |
| "value_loss_search": 0.0020672334404252977, | |
| "value_loss_thought": 0.0030977111006450287 | |
| }, | |
| { | |
| "epoch": 13.67, | |
| "grad_norm": 0.1376806904958747, | |
| "learning_rate": 1.1141059793458586e-07, | |
| "loss": 0.0135, | |
| "sft_loss": 0.001531593399704434, | |
| "step": 2290, | |
| "total_loss": 0.0015879040782849074, | |
| "value_loss": 0.0005631069248011045, | |
| "value_loss_search": 0.002264327982754821, | |
| "value_loss_thought": 0.002240527437061246 | |
| }, | |
| { | |
| "epoch": 13.7, | |
| "grad_norm": 0.15507672061398864, | |
| "learning_rate": 1.0632558840349333e-07, | |
| "loss": 0.0127, | |
| "sft_loss": 0.001709194021532312, | |
| "step": 2295, | |
| "total_loss": 0.0017596675465483714, | |
| "value_loss": 0.0005047351388384414, | |
| "value_loss_search": 0.0021137851630555816, | |
| "value_loss_thought": 0.0019240959423768799 | |
| }, | |
| { | |
| "epoch": 13.73, | |
| "grad_norm": 0.13950654690195688, | |
| "learning_rate": 1.0135682948501146e-07, | |
| "loss": 0.011, | |
| "sft_loss": 0.0016408312105340884, | |
| "step": 2300, | |
| "total_loss": 0.0016937724493836016, | |
| "value_loss": 0.0005294122824125225, | |
| "value_loss_search": 0.002247820197192141, | |
| "value_loss_thought": 0.0019874780593738704 | |
| }, | |
| { | |
| "epoch": 13.76, | |
| "grad_norm": 0.14603248892583517, | |
| "learning_rate": 9.650456249068268e-08, | |
| "loss": 0.0125, | |
| "sft_loss": 0.001476616770378314, | |
| "step": 2305, | |
| "total_loss": 0.001532993128402893, | |
| "value_loss": 0.0005637636299979931, | |
| "value_loss_search": 0.0019370973135977465, | |
| "value_loss_thought": 0.0025730117005196007 | |
| }, | |
| { | |
| "epoch": 13.79, | |
| "grad_norm": 0.12093820892491877, | |
| "learning_rate": 9.176902307453328e-08, | |
| "loss": 0.0123, | |
| "sft_loss": 0.0016192490846151486, | |
| "step": 2310, | |
| "total_loss": 0.0016830127960190567, | |
| "value_loss": 0.0006376370715429402, | |
| "value_loss_search": 0.0025056022790977293, | |
| "value_loss_thought": 0.0025954942909493183 | |
| }, | |
| { | |
| "epoch": 13.82, | |
| "grad_norm": 0.13585785284804588, | |
| "learning_rate": 8.715044122162508e-08, | |
| "loss": 0.0127, | |
| "sft_loss": 0.0017002749460516497, | |
| "step": 2315, | |
| "total_loss": 0.0017550808576103805, | |
| "value_loss": 0.0005480592571984744, | |
| "value_loss_search": 0.0021303763422451994, | |
| "value_loss_thought": 0.002254097702507352 | |
| }, | |
| { | |
| "epoch": 13.85, | |
| "grad_norm": 0.14486121709588692, | |
| "learning_rate": 8.264904123688745e-08, | |
| "loss": 0.0119, | |
| "sft_loss": 0.001309369836235419, | |
| "step": 2320, | |
| "total_loss": 0.0013705807045170104, | |
| "value_loss": 0.0006121086411894794, | |
| "value_loss_search": 0.0019892391615371706, | |
| "value_loss_thought": 0.002907629985912763 | |
| }, | |
| { | |
| "epoch": 13.88, | |
| "grad_norm": 0.13756062164141314, | |
| "learning_rate": 7.826504173422372e-08, | |
| "loss": 0.0131, | |
| "sft_loss": 0.0015762085182359441, | |
| "step": 2325, | |
| "total_loss": 0.0016776628372952018, | |
| "value_loss": 0.0010145431685145922, | |
| "value_loss_search": 0.0035052792621627304, | |
| "value_loss_thought": 0.004611065951957016 | |
| }, | |
| { | |
| "epoch": 13.91, | |
| "grad_norm": 0.12390462036780822, | |
| "learning_rate": 7.399865562589315e-08, | |
| "loss": 0.0131, | |
| "sft_loss": 0.0017432003776775673, | |
| "step": 2330, | |
| "total_loss": 0.0018063452240141941, | |
| "value_loss": 0.0006314483902201574, | |
| "value_loss_search": 0.001986952345464488, | |
| "value_loss_thought": 0.003064634781657105 | |
| }, | |
| { | |
| "epoch": 13.94, | |
| "grad_norm": 0.1357904093704358, | |
| "learning_rate": 6.985009011217209e-08, | |
| "loss": 0.0129, | |
| "sft_loss": 0.001526657902286388, | |
| "step": 2335, | |
| "total_loss": 0.0015822424648092692, | |
| "value_loss": 0.0005558455008667807, | |
| "value_loss_search": 0.0019112108780859672, | |
| "value_loss_thought": 0.0025355531076371564 | |
| }, | |
| { | |
| "epoch": 13.96, | |
| "grad_norm": 0.12173848490891062, | |
| "learning_rate": 6.581954667128965e-08, | |
| "loss": 0.0125, | |
| "sft_loss": 0.0019005106441909447, | |
| "step": 2340, | |
| "total_loss": 0.0019535693316782956, | |
| "value_loss": 0.0005305868885898235, | |
| "value_loss_search": 0.0019408738701571337, | |
| "value_loss_thought": 0.0023038212573055715 | |
| }, | |
| { | |
| "epoch": 13.99, | |
| "grad_norm": 0.12311131791939463, | |
| "learning_rate": 6.190722104964436e-08, | |
| "loss": 0.0132, | |
| "sft_loss": 0.0012315514002693817, | |
| "step": 2345, | |
| "total_loss": 0.0012908000820957, | |
| "value_loss": 0.0005924867479691898, | |
| "value_loss_search": 0.002214185446916872, | |
| "value_loss_thought": 0.0025257085178168382 | |
| }, | |
| { | |
| "epoch": 14.02, | |
| "grad_norm": 0.11289506914779934, | |
| "learning_rate": 5.811330325229569e-08, | |
| "loss": 0.012, | |
| "sft_loss": 0.0014274256362114101, | |
| "step": 2350, | |
| "total_loss": 0.00148404497326311, | |
| "value_loss": 0.0005661933894430149, | |
| "value_loss_search": 0.0018998756612177204, | |
| "value_loss_thought": 0.0026296714639101994 | |
| }, | |
| { | |
| "epoch": 14.05, | |
| "grad_norm": 0.14032918438561062, | |
| "learning_rate": 5.443797753373864e-08, | |
| "loss": 0.012, | |
| "sft_loss": 0.001353855719207786, | |
| "step": 2355, | |
| "total_loss": 0.0014032512601801273, | |
| "value_loss": 0.0004939554979955574, | |
| "value_loss_search": 0.0016622686555592737, | |
| "value_loss_thought": 0.0022893753313837804 | |
| }, | |
| { | |
| "epoch": 14.08, | |
| "grad_norm": 0.1302537325621518, | |
| "learning_rate": 5.0881422388952275e-08, | |
| "loss": 0.012, | |
| "sft_loss": 0.0009611410961952061, | |
| "step": 2360, | |
| "total_loss": 0.001024993611773084, | |
| "value_loss": 0.0006385252608311021, | |
| "value_loss_search": 0.0032244599921341433, | |
| "value_loss_thought": 0.0018837421045418523 | |
| }, | |
| { | |
| "epoch": 14.11, | |
| "grad_norm": 0.12164242099090507, | |
| "learning_rate": 4.7443810544734456e-08, | |
| "loss": 0.0116, | |
| "sft_loss": 0.0013725335855269804, | |
| "step": 2365, | |
| "total_loss": 0.0014293210130739808, | |
| "value_loss": 0.0005678743107637274, | |
| "value_loss_search": 0.002384104471229875, | |
| "value_loss_thought": 0.00215889002156473 | |
| }, | |
| { | |
| "epoch": 14.14, | |
| "grad_norm": 0.163492619946634, | |
| "learning_rate": 4.412530895131051e-08, | |
| "loss": 0.0115, | |
| "sft_loss": 0.0012715687771560624, | |
| "step": 2370, | |
| "total_loss": 0.001382276511756686, | |
| "value_loss": 0.0011070772634695913, | |
| "value_loss_search": 0.0019688921961233063, | |
| "value_loss_thought": 0.006887725907620279 | |
| }, | |
| { | |
| "epoch": 14.17, | |
| "grad_norm": 0.1481763234968163, | |
| "learning_rate": 4.092607877422578e-08, | |
| "loss": 0.0111, | |
| "sft_loss": 0.0013375403970712796, | |
| "step": 2375, | |
| "total_loss": 0.0014074473611998429, | |
| "value_loss": 0.0006990696618686343, | |
| "value_loss_search": 0.0029466108325550524, | |
| "value_loss_thought": 0.0026459465245352474 | |
| }, | |
| { | |
| "epoch": 14.2, | |
| "grad_norm": 0.1186453351073309, | |
| "learning_rate": 3.784627538652025e-08, | |
| "loss": 0.0112, | |
| "sft_loss": 0.0014332133869174868, | |
| "step": 2380, | |
| "total_loss": 0.0014889124539600117, | |
| "value_loss": 0.0005569907084790771, | |
| "value_loss_search": 0.002353395393970459, | |
| "value_loss_thought": 0.002102530315414697 | |
| }, | |
| { | |
| "epoch": 14.23, | |
| "grad_norm": 0.13825618980337176, | |
| "learning_rate": 3.488604836117987e-08, | |
| "loss": 0.0113, | |
| "sft_loss": 0.0014167566667310893, | |
| "step": 2385, | |
| "total_loss": 0.0014698782767844421, | |
| "value_loss": 0.0005312162420523237, | |
| "value_loss_search": 0.0019338703364951471, | |
| "value_loss_thought": 0.002315859655487884 | |
| }, | |
| { | |
| "epoch": 14.26, | |
| "grad_norm": 0.13774279281432664, | |
| "learning_rate": 3.204554146387456e-08, | |
| "loss": 0.0121, | |
| "sft_loss": 0.0012910763907711953, | |
| "step": 2390, | |
| "total_loss": 0.0013450498239109265, | |
| "value_loss": 0.0005397343376785102, | |
| "value_loss_search": 0.0018899123013738973, | |
| "value_loss_thought": 0.002427962389350569 | |
| }, | |
| { | |
| "epoch": 14.29, | |
| "grad_norm": 0.13075550190644245, | |
| "learning_rate": 2.9324892645975766e-08, | |
| "loss": 0.0125, | |
| "sft_loss": 0.0016997230239212513, | |
| "step": 2395, | |
| "total_loss": 0.0017409329679125563, | |
| "value_loss": 0.0004120994811614764, | |
| "value_loss_search": 0.0016292081562824024, | |
| "value_loss_thought": 0.0016675876831413917 | |
| }, | |
| { | |
| "epoch": 14.32, | |
| "grad_norm": 0.12955374374537487, | |
| "learning_rate": 2.67242340378554e-08, | |
| "loss": 0.013, | |
| "sft_loss": 0.0015193151630228385, | |
| "step": 2400, | |
| "total_loss": 0.0015746116821185653, | |
| "value_loss": 0.0005529651271899638, | |
| "value_loss_search": 0.0023058261356823095, | |
| "value_loss_thought": 0.0021178948942633725 | |
| }, | |
| { | |
| "epoch": 14.35, | |
| "grad_norm": 0.13785257512036658, | |
| "learning_rate": 2.4243691942471004e-08, | |
| "loss": 0.0129, | |
| "sft_loss": 0.0013197832508012653, | |
| "step": 2405, | |
| "total_loss": 0.001368108755303865, | |
| "value_loss": 0.00048325501416002226, | |
| "value_loss_search": 0.002034584193029332, | |
| "value_loss_thought": 0.0018314558808242508 | |
| }, | |
| { | |
| "epoch": 14.38, | |
| "grad_norm": 0.1248830545164438, | |
| "learning_rate": 2.1883386829229802e-08, | |
| "loss": 0.0117, | |
| "sft_loss": 0.0015428359998622908, | |
| "step": 2410, | |
| "total_loss": 0.0015903647321067638, | |
| "value_loss": 0.0004752873185424278, | |
| "value_loss_search": 0.0019664257517064245, | |
| "value_loss_thought": 0.0018358727832037403 | |
| }, | |
| { | |
| "epoch": 14.41, | |
| "grad_norm": 0.1284155399497688, | |
| "learning_rate": 1.9643433328139507e-08, | |
| "loss": 0.0129, | |
| "sft_loss": 0.0015311524126445874, | |
| "step": 2415, | |
| "total_loss": 0.0015765634343665625, | |
| "value_loss": 0.0004541101951872406, | |
| "value_loss_search": 0.0018012137584236144, | |
| "value_loss_thought": 0.0018316678463634161 | |
| }, | |
| { | |
| "epoch": 14.44, | |
| "grad_norm": 0.13722313083098495, | |
| "learning_rate": 1.7523940224239422e-08, | |
| "loss": 0.0129, | |
| "sft_loss": 0.0018140839121770114, | |
| "step": 2420, | |
| "total_loss": 0.001872726490603327, | |
| "value_loss": 0.0005864257679718321, | |
| "value_loss_search": 0.0018046398709088863, | |
| "value_loss_thought": 0.002886766302572141 | |
| }, | |
| { | |
| "epoch": 14.47, | |
| "grad_norm": 0.13659337122897194, | |
| "learning_rate": 1.5525010452319966e-08, | |
| "loss": 0.0118, | |
| "sft_loss": 0.001406003290321678, | |
| "step": 2425, | |
| "total_loss": 0.0014668999268792505, | |
| "value_loss": 0.0006089662623253389, | |
| "value_loss_search": 0.0023407790422652398, | |
| "value_loss_thought": 0.0025309510473562114 | |
| }, | |
| { | |
| "epoch": 14.5, | |
| "grad_norm": 0.1630202519081285, | |
| "learning_rate": 1.3646741091920546e-08, | |
| "loss": 0.012, | |
| "sft_loss": 0.0018367367767496035, | |
| "step": 2430, | |
| "total_loss": 0.0018952648396265205, | |
| "value_loss": 0.0005852805467498001, | |
| "value_loss_search": 0.0021444437501230594, | |
| "value_loss_thought": 0.002537800629784215 | |
| }, | |
| { | |
| "epoch": 14.53, | |
| "grad_norm": 0.14649930933179126, | |
| "learning_rate": 1.1889223362616664e-08, | |
| "loss": 0.0137, | |
| "sft_loss": 0.0014180621423292906, | |
| "step": 2435, | |
| "total_loss": 0.0014755390043148964, | |
| "value_loss": 0.0005747684329776348, | |
| "value_loss_search": 0.001847173216401643, | |
| "value_loss_thought": 0.0027509742275924507 | |
| }, | |
| { | |
| "epoch": 14.56, | |
| "grad_norm": 0.12468828325929393, | |
| "learning_rate": 1.0252542619589856e-08, | |
| "loss": 0.0123, | |
| "sft_loss": 0.0015723185992101208, | |
| "step": 2440, | |
| "total_loss": 0.0016369965853844093, | |
| "value_loss": 0.0006467797803679787, | |
| "value_loss_search": 0.0025067227985736905, | |
| "value_loss_thought": 0.0026675153821770436 | |
| }, | |
| { | |
| "epoch": 14.59, | |
| "grad_norm": 0.14711025562960323, | |
| "learning_rate": 8.736778349480723e-09, | |
| "loss": 0.0119, | |
| "sft_loss": 0.0015009303140686824, | |
| "step": 2445, | |
| "total_loss": 0.0015552314092317943, | |
| "value_loss": 0.0005430109402368543, | |
| "value_loss_search": 0.0018667381095156088, | |
| "value_loss_thought": 0.0024773493929330925 | |
| }, | |
| { | |
| "epoch": 14.62, | |
| "grad_norm": 0.1173625792384152, | |
| "learning_rate": 7.3420041665303585e-09, | |
| "loss": 0.0118, | |
| "sft_loss": 0.0015992191329132766, | |
| "step": 2450, | |
| "total_loss": 0.0016605654201441666, | |
| "value_loss": 0.0006134628767767936, | |
| "value_loss_search": 0.002318676908919315, | |
| "value_loss_thought": 0.00258902612285965 | |
| }, | |
| { | |
| "epoch": 14.65, | |
| "grad_norm": 0.13451903100649834, | |
| "learning_rate": 6.068287809004314e-09, | |
| "loss": 0.0121, | |
| "sft_loss": 0.0015388225147034973, | |
| "step": 2455, | |
| "total_loss": 0.0015945610022775724, | |
| "value_loss": 0.0005573847084178851, | |
| "value_loss_search": 0.0020913115042260344, | |
| "value_loss_thought": 0.002367766158249651 | |
| }, | |
| { | |
| "epoch": 14.68, | |
| "grad_norm": 0.1166996348736506, | |
| "learning_rate": 4.915691135903566e-09, | |
| "loss": 0.0115, | |
| "sft_loss": 0.001615592051530257, | |
| "step": 2460, | |
| "total_loss": 0.0016700670589443688, | |
| "value_loss": 0.0005447498988303323, | |
| "value_loss_search": 0.0028115076490621504, | |
| "value_loss_thought": 0.0015464915640791333 | |
| }, | |
| { | |
| "epoch": 14.71, | |
| "grad_norm": 0.1505633716850507, | |
| "learning_rate": 3.884270123959144e-09, | |
| "loss": 0.0114, | |
| "sft_loss": 0.0012577687215525658, | |
| "step": 2465, | |
| "total_loss": 0.0013172450172078242, | |
| "value_loss": 0.0005947628893864021, | |
| "value_loss_search": 0.002692494276323032, | |
| "value_loss_thought": 0.0020656088718169485 | |
| }, | |
| { | |
| "epoch": 14.74, | |
| "grad_norm": 0.1334895803298696, | |
| "learning_rate": 2.9740748649145778e-09, | |
| "loss": 0.0121, | |
| "sft_loss": 0.0015028521651402117, | |
| "step": 2470, | |
| "total_loss": 0.0015599074833644977, | |
| "value_loss": 0.0005705531071839686, | |
| "value_loss_search": 0.0018976363004298946, | |
| "value_loss_thought": 0.0026667885652045696 | |
| }, | |
| { | |
| "epoch": 14.77, | |
| "grad_norm": 0.11676293297773671, | |
| "learning_rate": 2.1851495630928475e-09, | |
| "loss": 0.0121, | |
| "sft_loss": 0.0011438517773058265, | |
| "step": 2475, | |
| "total_loss": 0.001199561754755507, | |
| "value_loss": 0.000557099866716726, | |
| "value_loss_search": 0.002204984583852365, | |
| "value_loss_thought": 0.002251814359776905 | |
| }, | |
| { | |
| "epoch": 14.8, | |
| "grad_norm": 0.12135462746119888, | |
| "learning_rate": 1.5175325332489331e-09, | |
| "loss": 0.0118, | |
| "sft_loss": 0.0013419981114566326, | |
| "step": 2480, | |
| "total_loss": 0.0014050020730792313, | |
| "value_loss": 0.0006300395589619257, | |
| "value_loss_search": 0.002390774656430494, | |
| "value_loss_thought": 0.00264954178167045 | |
| }, | |
| { | |
| "epoch": 14.83, | |
| "grad_norm": 0.12877818502933935, | |
| "learning_rate": 9.712561987104685e-10, | |
| "loss": 0.012, | |
| "sft_loss": 0.001536320144077763, | |
| "step": 2485, | |
| "total_loss": 0.0015917829690181406, | |
| "value_loss": 0.0005546283648982353, | |
| "value_loss_search": 0.0021470139769645515, | |
| "value_loss_thought": 0.0022900129014942648 | |
| }, | |
| { | |
| "epoch": 14.86, | |
| "grad_norm": 0.12977801368527653, | |
| "learning_rate": 5.463470898017798e-10, | |
| "loss": 0.0128, | |
| "sft_loss": 0.00153597031312529, | |
| "step": 2490, | |
| "total_loss": 0.0015952805159344051, | |
| "value_loss": 0.0005931021074616182, | |
| "value_loss_search": 0.002620013221735462, | |
| "value_loss_thought": 0.002124803609603987 | |
| }, | |
| { | |
| "epoch": 14.89, | |
| "grad_norm": 0.10846141171279486, | |
| "learning_rate": 2.4282584255547194e-10, | |
| "loss": 0.0121, | |
| "sft_loss": 0.0014916551124770194, | |
| "step": 2495, | |
| "total_loss": 0.00154097568412368, | |
| "value_loss": 0.0004932056985808231, | |
| "value_loss_search": 0.001677905346400621, | |
| "value_loss_thought": 0.0022677402528643144 | |
| }, | |
| { | |
| "epoch": 14.92, | |
| "grad_norm": 0.1265986974514275, | |
| "learning_rate": 6.070719771156252e-11, | |
| "loss": 0.0123, | |
| "sft_loss": 0.0017506703617982567, | |
| "step": 2500, | |
| "total_loss": 0.0018172925318538091, | |
| "value_loss": 0.0006662217223492916, | |
| "value_loss_search": 0.00269593252516529, | |
| "value_loss_thought": 0.0026338411991446266 | |
| }, | |
| { | |
| "epoch": 14.95, | |
| "grad_norm": 0.12729059230038972, | |
| "learning_rate": 0.0, | |
| "loss": 0.0116, | |
| "sft_loss": 0.0012781478551914915, | |
| "step": 2505, | |
| "total_loss": 0.0013354677166319106, | |
| "value_loss": 0.0005731985727152278, | |
| "value_loss_search": 0.0018533691099037243, | |
| "value_loss_thought": 0.0027322194214775665 | |
| }, | |
| { | |
| "epoch": 14.95, | |
| "step": 2505, | |
| "total_flos": 0.0, | |
| "train_loss": 0.09394951220936404, | |
| "train_runtime": 92418.8162, | |
| "train_samples_per_second": 3.481, | |
| "train_steps_per_second": 0.027 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 2505, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 15, | |
| "save_steps": 350, | |
| "total_flos": 0.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |