{ "best_metric": null, "best_model_checkpoint": null, "epoch": 14.94964565460649, "eval_steps": 500, "global_step": 2505, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "grad_norm": 29.208302611647333, "learning_rate": 9.960159362549802e-08, "loss": 1.1515, "sft_loss": 0.1292552625760436, "step": 5, "total_loss": 0.15341808083976502, "value_loss": 0.24162817830219865, "value_loss_search": 0.8858459698676597, "value_loss_thought": 1.0471794582015719 }, { "epoch": 0.06, "grad_norm": 22.329741865045907, "learning_rate": 1.9920318725099604e-07, "loss": 1.2242, "sft_loss": 0.13713806308805943, "step": 10, "total_loss": 0.16430261046643863, "value_loss": 0.27164546787971633, "value_loss_search": 1.0301193907580455, "value_loss_thought": 1.1430443533812649 }, { "epoch": 0.09, "grad_norm": 16.69220166984679, "learning_rate": 2.9880478087649405e-07, "loss": 1.1712, "sft_loss": 0.11755451802164316, "step": 15, "total_loss": 0.1385691657076677, "value_loss": 0.21014647737029007, "value_loss_search": 0.8529300302307092, "value_loss_thought": 0.8282418000162579 }, { "epoch": 0.12, "grad_norm": 15.26729873693678, "learning_rate": 3.9840637450199207e-07, "loss": 1.047, "sft_loss": 0.08705389499664307, "step": 20, "total_loss": 0.10808856324001681, "value_loss": 0.21034667690400966, "value_loss_search": 0.9239509059012562, "value_loss_thought": 0.7588225028477609 }, { "epoch": 0.15, "grad_norm": 16.491728960581845, "learning_rate": 4.9800796812749e-07, "loss": 1.1149, "sft_loss": 0.11774167586117983, "step": 25, "total_loss": 0.13845662841631565, "value_loss": 0.20714952626731248, "value_loss_search": 0.7495332275483975, "value_loss_thought": 0.9076629818649963 }, { "epoch": 0.18, "grad_norm": 13.549454686271, "learning_rate": 5.976095617529881e-07, "loss": 0.883, "sft_loss": 0.083274289034307, "step": 30, "total_loss": 0.10342498328827787, "value_loss": 0.2015069484245032, "value_loss_search": 0.6761670514111756, "value_loss_thought": 0.9358885432491661 }, { "epoch": 0.21, "grad_norm": 13.443693495652768, "learning_rate": 6.972111553784861e-07, "loss": 0.805, "sft_loss": 0.08413450215011835, "step": 35, "total_loss": 0.1025179280739394, "value_loss": 0.18383425649954005, "value_loss_search": 0.5737597634542908, "value_loss_thought": 0.8969142883783207 }, { "epoch": 0.24, "grad_norm": 9.393174013923895, "learning_rate": 7.968127490039841e-07, "loss": 0.7925, "sft_loss": 0.07780555076897144, "step": 40, "total_loss": 0.0958856519588153, "value_loss": 0.18080101079540328, "value_loss_search": 0.6263274765132223, "value_loss_thought": 0.8200806069507962 }, { "epoch": 0.27, "grad_norm": 14.557899943787424, "learning_rate": 8.964143426294822e-07, "loss": 0.6601, "sft_loss": 0.06730504501610994, "step": 45, "total_loss": 0.08439774930666318, "value_loss": 0.17092704317183233, "value_loss_search": 0.6156709093504105, "value_loss_thought": 0.7517454390181229 }, { "epoch": 0.3, "grad_norm": 5.237593181257475, "learning_rate": 9.9601593625498e-07, "loss": 0.5741, "sft_loss": 0.05471886033192277, "step": 50, "total_loss": 0.06898687301945756, "value_loss": 0.14268012425163762, "value_loss_search": 0.5047549274406264, "value_loss_thought": 0.6366860627662391 }, { "epoch": 0.33, "grad_norm": 5.49922083273137, "learning_rate": 1.0956175298804781e-06, "loss": 0.5123, "sft_loss": 0.05361118288710713, "step": 55, "total_loss": 0.0653495688289695, "value_loss": 0.1173838603310287, "value_loss_search": 0.3898644742756964, "value_loss_thought": 0.5492064105579629 }, { "epoch": 0.36, "grad_norm": 4.288203361946918, "learning_rate": 1.1952191235059762e-06, "loss": 0.4635, "sft_loss": 0.03561480939388275, "step": 60, "total_loss": 0.04738654125249013, "value_loss": 0.11771731851040386, "value_loss_search": 0.3933807262365008, "value_loss_thought": 0.5483578289393336 }, { "epoch": 0.39, "grad_norm": 4.058680831426542, "learning_rate": 1.294820717131474e-06, "loss": 0.405, "sft_loss": 0.03836059970781207, "step": 65, "total_loss": 0.049667860931367616, "value_loss": 0.1130726064555347, "value_loss_search": 0.39820473948711876, "value_loss_thought": 0.5063761109966436 }, { "epoch": 0.42, "grad_norm": 6.869902478996511, "learning_rate": 1.3944223107569721e-06, "loss": 0.3733, "sft_loss": 0.03362296093255281, "step": 70, "total_loss": 0.04714309505507117, "value_loss": 0.13520133687416092, "value_loss_search": 0.4016104494468891, "value_loss_thought": 0.6800002471776679 }, { "epoch": 0.45, "grad_norm": 6.311680041987265, "learning_rate": 1.4940239043824702e-06, "loss": 0.3481, "sft_loss": 0.028891393821686505, "step": 75, "total_loss": 0.04102085728627571, "value_loss": 0.12129463239980395, "value_loss_search": 0.4050044394622091, "value_loss_thought": 0.5653526145266369 }, { "epoch": 0.48, "grad_norm": 2.336922175624455, "learning_rate": 1.5936254980079683e-06, "loss": 0.3233, "sft_loss": 0.022906177304685117, "step": 80, "total_loss": 0.034932951488735854, "value_loss": 0.12026773888501338, "value_loss_search": 0.3894530791064881, "value_loss_thought": 0.5726888305682223 }, { "epoch": 0.51, "grad_norm": 5.45038020134611, "learning_rate": 1.6932270916334661e-06, "loss": 0.3587, "sft_loss": 0.028216248843818903, "step": 85, "total_loss": 0.04048567872773674, "value_loss": 0.12269429753323494, "value_loss_search": 0.40813677250607727, "value_loss_thought": 0.5734176081576152 }, { "epoch": 0.54, "grad_norm": 4.373139438548536, "learning_rate": 1.7928286852589644e-06, "loss": 0.322, "sft_loss": 0.030063434736803174, "step": 90, "total_loss": 0.0427513758506393, "value_loss": 0.1268794086528942, "value_loss_search": 0.45016070968977145, "value_loss_thought": 0.5648745556129142 }, { "epoch": 0.57, "grad_norm": 3.3872903271458408, "learning_rate": 1.8924302788844623e-06, "loss": 0.3401, "sft_loss": 0.03258526036515832, "step": 95, "total_loss": 0.04433769078787009, "value_loss": 0.11752430766646285, "value_loss_search": 0.3465800850848808, "value_loss_thought": 0.5936143765953602 }, { "epoch": 0.6, "grad_norm": 2.899664564340515, "learning_rate": 1.99203187250996e-06, "loss": 0.3067, "sft_loss": 0.023941753478720785, "step": 100, "total_loss": 0.03416529792812071, "value_loss": 0.10223544192849658, "value_loss_search": 0.3503992037207354, "value_loss_thought": 0.46748433216125707 }, { "epoch": 0.63, "grad_norm": 5.3644737487705125, "learning_rate": 2.0916334661354584e-06, "loss": 0.3253, "sft_loss": 0.03116959072649479, "step": 105, "total_loss": 0.04235388926063024, "value_loss": 0.1118429817724973, "value_loss_search": 0.3848635824284429, "value_loss_thought": 0.509880276385229 }, { "epoch": 0.66, "grad_norm": 2.807659467395103, "learning_rate": 2.1912350597609563e-06, "loss": 0.3242, "sft_loss": 0.030541227804496883, "step": 110, "total_loss": 0.03971213593467837, "value_loss": 0.09170907911611721, "value_loss_search": 0.2967309238272719, "value_loss_thought": 0.4369417035020888 }, { "epoch": 0.69, "grad_norm": 5.565383438847989, "learning_rate": 2.290836653386454e-06, "loss": 0.2985, "sft_loss": 0.03250002646818757, "step": 115, "total_loss": 0.04239036420258344, "value_loss": 0.09890337522811024, "value_loss_search": 0.3520674262268585, "value_loss_thought": 0.43915957515127957 }, { "epoch": 0.72, "grad_norm": 6.59850948978845, "learning_rate": 2.3904382470119524e-06, "loss": 0.3364, "sft_loss": 0.025409412384033204, "step": 120, "total_loss": 0.03802986370919825, "value_loss": 0.12620451210823375, "value_loss_search": 0.3680036140809534, "value_loss_thought": 0.6416324874851853 }, { "epoch": 0.75, "grad_norm": 5.179904397966752, "learning_rate": 2.4900398406374503e-06, "loss": 0.3036, "sft_loss": 0.029751096572726964, "step": 125, "total_loss": 0.04014970105636166, "value_loss": 0.10398604299989529, "value_loss_search": 0.3222789403582283, "value_loss_thought": 0.5096093997417483 }, { "epoch": 0.78, "grad_norm": 2.7818925064532296, "learning_rate": 2.589641434262948e-06, "loss": 0.3135, "sft_loss": 0.027432794403284787, "step": 130, "total_loss": 0.039190283310017546, "value_loss": 0.1175748852387187, "value_loss_search": 0.379848483775595, "value_loss_thought": 0.5607506038024439 }, { "epoch": 0.81, "grad_norm": 3.1180919559174898, "learning_rate": 2.6892430278884464e-06, "loss": 0.2712, "sft_loss": 0.02562392014078796, "step": 135, "total_loss": 0.037023369785310935, "value_loss": 0.11399449376476696, "value_loss_search": 0.3599409429902153, "value_loss_thought": 0.552015009484603 }, { "epoch": 0.84, "grad_norm": 4.040800077838943, "learning_rate": 2.7888446215139443e-06, "loss": 0.3085, "sft_loss": 0.030298156943172217, "step": 140, "total_loss": 0.040708614706454684, "value_loss": 0.10410457447142107, "value_loss_search": 0.3299579021160753, "value_loss_thought": 0.5028786922775907 }, { "epoch": 0.87, "grad_norm": 3.7542993625502863, "learning_rate": 2.8884462151394426e-06, "loss": 0.2901, "sft_loss": 0.033623593579977754, "step": 145, "total_loss": 0.04418132658774994, "value_loss": 0.10557732760353247, "value_loss_search": 0.3160576841353759, "value_loss_thought": 0.528560933744302 }, { "epoch": 0.9, "grad_norm": 4.460937657390284, "learning_rate": 2.9880478087649404e-06, "loss": 0.2916, "sft_loss": 0.027346841990947723, "step": 150, "total_loss": 0.03589563165332947, "value_loss": 0.08548789632186526, "value_loss_search": 0.2342234575400653, "value_loss_thought": 0.4496797103092831 }, { "epoch": 0.93, "grad_norm": 2.4354922648897808, "learning_rate": 3.0876494023904387e-06, "loss": 0.2869, "sft_loss": 0.0277699186000973, "step": 155, "total_loss": 0.03829326016602863, "value_loss": 0.10523341278894804, "value_loss_search": 0.30548287588171663, "value_loss_thought": 0.536384426720906 }, { "epoch": 0.95, "grad_norm": 7.993819548921568, "learning_rate": 3.1872509960159366e-06, "loss": 0.2799, "sft_loss": 0.028502677148208023, "step": 160, "total_loss": 0.03686514748242189, "value_loss": 0.08362470217980444, "value_loss_search": 0.242223716438275, "value_loss_thought": 0.4267738984548487 }, { "epoch": 0.98, "grad_norm": 3.8893692041440224, "learning_rate": 3.2868525896414344e-06, "loss": 0.271, "sft_loss": 0.03119087303057313, "step": 165, "total_loss": 0.04075200489824056, "value_loss": 0.09561131574337196, "value_loss_search": 0.26875160563213285, "value_loss_thought": 0.4961389156705991 }, { "epoch": 1.01, "grad_norm": 3.77858117117453, "learning_rate": 3.3864541832669323e-06, "loss": 0.2499, "sft_loss": 0.019535421626642346, "step": 170, "total_loss": 0.025970515375956894, "value_loss": 0.06435093678010162, "value_loss_search": 0.20992231852004578, "value_loss_thought": 0.30488517887424677 }, { "epoch": 1.04, "grad_norm": 3.951277466183642, "learning_rate": 3.486055776892431e-06, "loss": 0.2693, "sft_loss": 0.023267556354403494, "step": 175, "total_loss": 0.030257757987055812, "value_loss": 0.0699020166444825, "value_loss_search": 0.21118213117242704, "value_loss_thought": 0.3480340027017519 }, { "epoch": 1.07, "grad_norm": 2.9207350101372365, "learning_rate": 3.585657370517929e-06, "loss": 0.262, "sft_loss": 0.025930221634916963, "step": 180, "total_loss": 0.03511905904761079, "value_loss": 0.09188836992834695, "value_loss_search": 0.26077198034647325, "value_loss_thought": 0.4743349766329629 }, { "epoch": 1.1, "grad_norm": 5.609912691724805, "learning_rate": 3.6852589641434267e-06, "loss": 0.2446, "sft_loss": 0.024912988301366568, "step": 185, "total_loss": 0.03355662206740817, "value_loss": 0.08643633674801095, "value_loss_search": 0.24081130932386258, "value_loss_thought": 0.4506793859531172 }, { "epoch": 1.13, "grad_norm": 3.5622387501649775, "learning_rate": 3.7848605577689246e-06, "loss": 0.2466, "sft_loss": 0.028595651406794785, "step": 190, "total_loss": 0.0361521876320694, "value_loss": 0.07556536267784394, "value_loss_search": 0.22485005248195195, "value_loss_thought": 0.37967284960468534 }, { "epoch": 1.16, "grad_norm": 5.1634897659031616, "learning_rate": 3.884462151394423e-06, "loss": 0.23, "sft_loss": 0.02528013151604682, "step": 195, "total_loss": 0.03363830259149836, "value_loss": 0.08358170948340557, "value_loss_search": 0.25611529394162746, "value_loss_thought": 0.4125383788938052 }, { "epoch": 1.19, "grad_norm": 2.0650765438648104, "learning_rate": 3.98406374501992e-06, "loss": 0.245, "sft_loss": 0.016848266730085014, "step": 200, "total_loss": 0.026103121823689436, "value_loss": 0.09254854982718826, "value_loss_search": 0.3132247169883044, "value_loss_thought": 0.4271636780547851 }, { "epoch": 1.22, "grad_norm": 5.039293863721038, "learning_rate": 4.083665338645419e-06, "loss": 0.2528, "sft_loss": 0.02782264384441078, "step": 205, "total_loss": 0.03652619049535133, "value_loss": 0.08703546431788708, "value_loss_search": 0.24252449526474038, "value_loss_thought": 0.453759221357177 }, { "epoch": 1.25, "grad_norm": 4.004833085846898, "learning_rate": 4.183266932270917e-06, "loss": 0.2485, "sft_loss": 0.03399158006068319, "step": 210, "total_loss": 0.04147085763215728, "value_loss": 0.07479277592756262, "value_loss_search": 0.24146745675992634, "value_loss_thought": 0.3568747464043554 }, { "epoch": 1.28, "grad_norm": 2.2900194027614567, "learning_rate": 4.282868525896415e-06, "loss": 0.2386, "sft_loss": 0.030579356662929057, "step": 215, "total_loss": 0.0377965772360767, "value_loss": 0.07217220571910729, "value_loss_search": 0.23593004742797347, "value_loss_thought": 0.34144759898772464 }, { "epoch": 1.31, "grad_norm": 2.2530806573737077, "learning_rate": 4.382470119521913e-06, "loss": 0.2305, "sft_loss": 0.02292049501556903, "step": 220, "total_loss": 0.028867545309185515, "value_loss": 0.05947050442919135, "value_loss_search": 0.17588968106138053, "value_loss_thought": 0.2998743573494721 }, { "epoch": 1.34, "grad_norm": 2.3848100156605976, "learning_rate": 4.482071713147411e-06, "loss": 0.2488, "sft_loss": 0.021163933374918998, "step": 225, "total_loss": 0.027522947750139793, "value_loss": 0.06359014347035555, "value_loss_search": 0.17336269621831663, "value_loss_thought": 0.33535845117003193 }, { "epoch": 1.37, "grad_norm": 3.155244067655952, "learning_rate": 4.581673306772908e-06, "loss": 0.2318, "sft_loss": 0.021529758046381176, "step": 230, "total_loss": 0.02896902564170887, "value_loss": 0.07439267357622156, "value_loss_search": 0.21162400761529626, "value_loss_thought": 0.3835173849183775 }, { "epoch": 1.4, "grad_norm": 3.8792764757968086, "learning_rate": 4.681274900398407e-06, "loss": 0.2156, "sft_loss": 0.023555860621854663, "step": 235, "total_loss": 0.03241225123529148, "value_loss": 0.08856390380533412, "value_loss_search": 0.24428551671026072, "value_loss_thought": 0.4642257096977119 }, { "epoch": 1.43, "grad_norm": 3.4958134022432183, "learning_rate": 4.780876494023905e-06, "loss": 0.238, "sft_loss": 0.024119808338582516, "step": 240, "total_loss": 0.02983194561311393, "value_loss": 0.05712137209266075, "value_loss_search": 0.17349829038121242, "value_loss_thought": 0.28347268382058244 }, { "epoch": 1.46, "grad_norm": 2.3555543700333303, "learning_rate": 4.880478087649403e-06, "loss": 0.2381, "sft_loss": 0.02051589481998235, "step": 245, "total_loss": 0.02669115279750258, "value_loss": 0.0617525799851137, "value_loss_search": 0.1842656165907101, "value_loss_thought": 0.3097550194659561 }, { "epoch": 1.49, "grad_norm": 7.0740358168657576, "learning_rate": 4.980079681274901e-06, "loss": 0.2514, "sft_loss": 0.02382271084934473, "step": 250, "total_loss": 0.031842488735128426, "value_loss": 0.08019777825520577, "value_loss_search": 0.2615329277869023, "value_loss_thought": 0.38004929782619 }, { "epoch": 1.52, "grad_norm": 2.299103577467398, "learning_rate": 4.9999611473368576e-06, "loss": 0.2199, "sft_loss": 0.02000407627783716, "step": 255, "total_loss": 0.025701836839834867, "value_loss": 0.05697760357270454, "value_loss_search": 0.14788065557549998, "value_loss_thought": 0.3079401723291085 }, { "epoch": 1.55, "grad_norm": 1.6807413310213553, "learning_rate": 4.999803310462543e-06, "loss": 0.2232, "sft_loss": 0.025435299216769635, "step": 260, "total_loss": 0.033880134870196345, "value_loss": 0.08444835591653828, "value_loss_search": 0.2635558438050339, "value_loss_thought": 0.4120309994032141 }, { "epoch": 1.58, "grad_norm": 3.984061493281701, "learning_rate": 4.999524068745182e-06, "loss": 0.2248, "sft_loss": 0.022567298170179127, "step": 265, "total_loss": 0.02977634134258551, "value_loss": 0.07209043111970459, "value_loss_search": 0.22659471631785663, "value_loss_thought": 0.35012873328050775 }, { "epoch": 1.61, "grad_norm": 5.581660893537601, "learning_rate": 4.99912343574636e-06, "loss": 0.2457, "sft_loss": 0.02686069840565324, "step": 270, "total_loss": 0.033531226357308694, "value_loss": 0.06670527966834924, "value_loss_search": 0.21075203863763364, "value_loss_thought": 0.3228901971851883 }, { "epoch": 1.64, "grad_norm": 4.189848962908778, "learning_rate": 4.998601430923122e-06, "loss": 0.2437, "sft_loss": 0.025497494312003256, "step": 275, "total_loss": 0.031059039360116003, "value_loss": 0.0556154465644795, "value_loss_search": 0.16651217907464116, "value_loss_thought": 0.27841138996300285 }, { "epoch": 1.67, "grad_norm": 1.2840019127815288, "learning_rate": 4.997958079627029e-06, "loss": 0.2337, "sft_loss": 0.02229512729682028, "step": 280, "total_loss": 0.02872599834945504, "value_loss": 0.06430871063357699, "value_loss_search": 0.15088917155694617, "value_loss_thought": 0.3635805150745 }, { "epoch": 1.7, "grad_norm": 1.9906954863509052, "learning_rate": 4.997193413102923e-06, "loss": 0.2358, "sft_loss": 0.030755233392119406, "step": 285, "total_loss": 0.03727109958572328, "value_loss": 0.06515866482732235, "value_loss_search": 0.19967410092722276, "value_loss_thought": 0.3215952147511416 }, { "epoch": 1.73, "grad_norm": 2.0542643472869706, "learning_rate": 4.996307468487414e-06, "loss": 0.222, "sft_loss": 0.02293794075958431, "step": 290, "total_loss": 0.027633006192809263, "value_loss": 0.04695065361320303, "value_loss_search": 0.1386001783932443, "value_loss_thought": 0.23700505015585804 }, { "epoch": 1.76, "grad_norm": 2.1566271207415375, "learning_rate": 4.995300288807075e-06, "loss": 0.2254, "sft_loss": 0.024227931816130876, "step": 295, "total_loss": 0.0282421637364223, "value_loss": 0.04014231975597795, "value_loss_search": 0.13643495284136406, "value_loss_thought": 0.18470360402425284 }, { "epoch": 1.79, "grad_norm": 1.780628989439151, "learning_rate": 4.994171922976349e-06, "loss": 0.226, "sft_loss": 0.019921383424662053, "step": 300, "total_loss": 0.02442953032641526, "value_loss": 0.045081469729302624, "value_loss_search": 0.1360888234203003, "value_loss_thought": 0.22456293730974722 }, { "epoch": 1.82, "grad_norm": 2.350607243352896, "learning_rate": 4.992922425795179e-06, "loss": 0.2152, "sft_loss": 0.021342089958488943, "step": 305, "total_loss": 0.02731037002786252, "value_loss": 0.059682800685004624, "value_loss_search": 0.14473845993270515, "value_loss_thought": 0.33272394547384465 }, { "epoch": 1.85, "grad_norm": 2.476250174714291, "learning_rate": 4.991551857946343e-06, "loss": 0.2167, "sft_loss": 0.024368287762627006, "step": 310, "total_loss": 0.02891062788005456, "value_loss": 0.04542339965519204, "value_loss_search": 0.13215827879938616, "value_loss_thought": 0.23122892067385692 }, { "epoch": 1.88, "grad_norm": 2.1211906530083304, "learning_rate": 4.990060285992507e-06, "loss": 0.2267, "sft_loss": 0.03224845631048083, "step": 315, "total_loss": 0.0367003864619619, "value_loss": 0.044519302164553666, "value_loss_search": 0.13228539347510376, "value_loss_thought": 0.22386902507196282 }, { "epoch": 1.91, "grad_norm": 4.051009811709579, "learning_rate": 4.988447782372996e-06, "loss": 0.2227, "sft_loss": 0.015061728050932288, "step": 320, "total_loss": 0.020883486873708536, "value_loss": 0.0582175869083585, "value_loss_search": 0.18025588636555767, "value_loss_thought": 0.28548481071411513 }, { "epoch": 1.94, "grad_norm": 6.185126993035625, "learning_rate": 4.986714425400269e-06, "loss": 0.2242, "sft_loss": 0.02483751201070845, "step": 325, "total_loss": 0.029222130161724636, "value_loss": 0.043846180192485915, "value_loss_search": 0.126026460820583, "value_loss_thought": 0.22474298011511565 }, { "epoch": 1.97, "grad_norm": 1.2748032727970502, "learning_rate": 4.98486029925612e-06, "loss": 0.2113, "sft_loss": 0.022788235195912422, "step": 330, "total_loss": 0.027279667726634215, "value_loss": 0.04491432422255457, "value_loss_search": 0.14298201398346463, "value_loss_thought": 0.21633257850262452 }, { "epoch": 2.0, "grad_norm": 1.552484041484157, "learning_rate": 4.982885493987595e-06, "loss": 0.2124, "sft_loss": 0.02811491028405726, "step": 335, "total_loss": 0.032856917950812206, "value_loss": 0.04742007501154148, "value_loss_search": 0.15364139593775689, "value_loss_thought": 0.22571920259069883 }, { "epoch": 2.03, "grad_norm": 2.7785428382948316, "learning_rate": 4.9807901055026054e-06, "loss": 0.1788, "sft_loss": 0.018437814386561512, "step": 340, "total_loss": 0.02146597366839842, "value_loss": 0.03028159231407699, "value_loss_search": 0.10903610251207282, "value_loss_thought": 0.13321663648093818 }, { "epoch": 2.06, "grad_norm": 1.2850611180768896, "learning_rate": 4.978574235565284e-06, "loss": 0.1788, "sft_loss": 0.023076185397803782, "step": 345, "total_loss": 0.027342022850689318, "value_loss": 0.04265837599887164, "value_loss_search": 0.13071881325704454, "value_loss_thought": 0.21054819686614792 }, { "epoch": 2.09, "grad_norm": 1.3911394308456917, "learning_rate": 4.976237991791033e-06, "loss": 0.1719, "sft_loss": 0.019436489534564318, "step": 350, "total_loss": 0.023576461341781395, "value_loss": 0.041399715623504106, "value_loss_search": 0.1492293958552068, "value_loss_thought": 0.18196833048205008 }, { "epoch": 2.12, "grad_norm": 1.6481312891558262, "learning_rate": 4.973781487641303e-06, "loss": 0.1844, "sft_loss": 0.025559269287623466, "step": 355, "total_loss": 0.02877166439211578, "value_loss": 0.032123952108486266, "value_loss_search": 0.11874442657458531, "value_loss_thought": 0.13824718931678034 }, { "epoch": 2.15, "grad_norm": 1.103781032562002, "learning_rate": 4.9712048424180806e-06, "loss": 0.1862, "sft_loss": 0.02381216634530574, "step": 360, "total_loss": 0.02632210613701318, "value_loss": 0.025099397322048845, "value_loss_search": 0.08361520563119029, "value_loss_thought": 0.11717997152591124 }, { "epoch": 2.18, "grad_norm": 2.0386255202501173, "learning_rate": 4.968508181258093e-06, "loss": 0.1637, "sft_loss": 0.01670523874927312, "step": 365, "total_loss": 0.019544158500229968, "value_loss": 0.028389195861564076, "value_loss_search": 0.08506083722818403, "value_loss_thought": 0.14205273159604986 }, { "epoch": 2.21, "grad_norm": 1.7600289408433292, "learning_rate": 4.965691635126737e-06, "loss": 0.1816, "sft_loss": 0.0221649584826082, "step": 370, "total_loss": 0.02530371010229828, "value_loss": 0.03138751635051449, "value_loss_search": 0.099762270176916, "value_loss_thought": 0.1513378613846726 }, { "epoch": 2.24, "grad_norm": 1.2580654310157564, "learning_rate": 4.962755340811709e-06, "loss": 0.179, "sft_loss": 0.01950194430537522, "step": 375, "total_loss": 0.022580356493926958, "value_loss": 0.03078412101158392, "value_loss_search": 0.09001722024981973, "value_loss_thought": 0.15625574861769564 }, { "epoch": 2.27, "grad_norm": 2.234333438325428, "learning_rate": 4.959699440916369e-06, "loss": 0.1749, "sft_loss": 0.022818994731642304, "step": 380, "total_loss": 0.025907937735428276, "value_loss": 0.030889428492628213, "value_loss_search": 0.08153837720744832, "value_loss_thought": 0.16557704737147105 }, { "epoch": 2.3, "grad_norm": 1.085143168299747, "learning_rate": 4.956524083852812e-06, "loss": 0.1667, "sft_loss": 0.01802813063841313, "step": 385, "total_loss": 0.020953079688115394, "value_loss": 0.029249489003632334, "value_loss_search": 0.09198170516392565, "value_loss_thought": 0.14201420772205892 }, { "epoch": 2.33, "grad_norm": 2.242763660758448, "learning_rate": 4.953229423834662e-06, "loss": 0.1818, "sft_loss": 0.011760137742385268, "step": 390, "total_loss": 0.015489974882802926, "value_loss": 0.03729837100072473, "value_loss_search": 0.08624583505323927, "value_loss_thought": 0.21214113264650222 }, { "epoch": 2.36, "grad_norm": 0.8938036920537223, "learning_rate": 4.949815620869579e-06, "loss": 0.1824, "sft_loss": 0.02469517719000578, "step": 395, "total_loss": 0.0275583725127035, "value_loss": 0.02863195287209237, "value_loss_search": 0.08686879273809608, "value_loss_thought": 0.1421868310884747 }, { "epoch": 2.39, "grad_norm": 0.7973027480525212, "learning_rate": 4.946282840751494e-06, "loss": 0.1769, "sft_loss": 0.018014212045818566, "step": 400, "total_loss": 0.021850849609819534, "value_loss": 0.03836637459389749, "value_loss_search": 0.07837754817135192, "value_loss_thought": 0.22855344959643845 }, { "epoch": 2.42, "grad_norm": 1.2529435303540792, "learning_rate": 4.942631255052551e-06, "loss": 0.1728, "sft_loss": 0.015492680622264743, "step": 405, "total_loss": 0.018379339482680734, "value_loss": 0.028866587373761375, "value_loss_search": 0.09674510946805412, "value_loss_thought": 0.1341875916292338 }, { "epoch": 2.45, "grad_norm": 0.7134673687996919, "learning_rate": 4.938861041114779e-06, "loss": 0.1664, "sft_loss": 0.01866905202623457, "step": 410, "total_loss": 0.020626170139428267, "value_loss": 0.019571180698403624, "value_loss_search": 0.06965761288097383, "value_loss_thought": 0.08691183333139633 }, { "epoch": 2.48, "grad_norm": 0.7529741551559901, "learning_rate": 4.934972382041475e-06, "loss": 0.1849, "sft_loss": 0.019827575120143593, "step": 415, "total_loss": 0.022788787103309005, "value_loss": 0.029612120029923972, "value_loss_search": 0.0888989626518196, "value_loss_thought": 0.1479980006653932 }, { "epoch": 2.51, "grad_norm": 1.7995496682865593, "learning_rate": 4.9309654666883165e-06, "loss": 0.179, "sft_loss": 0.020990492962300777, "step": 420, "total_loss": 0.024445655822250955, "value_loss": 0.03455162828067841, "value_loss_search": 0.08684888708735343, "value_loss_thought": 0.18956413638661615 }, { "epoch": 2.54, "grad_norm": 1.670690442471347, "learning_rate": 4.926840489654184e-06, "loss": 0.1894, "sft_loss": 0.02077859474811703, "step": 425, "total_loss": 0.024163767833670136, "value_loss": 0.033851731998584, "value_loss_search": 0.08538484702294227, "value_loss_thought": 0.18542900827596895 }, { "epoch": 2.57, "grad_norm": 1.8704656616088737, "learning_rate": 4.922597651271716e-06, "loss": 0.1927, "sft_loss": 0.02478037038818002, "step": 430, "total_loss": 0.02731952823942265, "value_loss": 0.025391577205482463, "value_loss_search": 0.08109033311634448, "value_loss_thought": 0.12204228106857044 }, { "epoch": 2.6, "grad_norm": 1.3917455631379416, "learning_rate": 4.918237157597574e-06, "loss": 0.1792, "sft_loss": 0.01933148135431111, "step": 435, "total_loss": 0.022168212975520872, "value_loss": 0.02836731561783381, "value_loss_search": 0.0816025677968355, "value_loss_thought": 0.1453359603422541 }, { "epoch": 2.63, "grad_norm": 1.0019606814299877, "learning_rate": 4.913759220402441e-06, "loss": 0.1911, "sft_loss": 0.019400009652599692, "step": 440, "total_loss": 0.02239533064052921, "value_loss": 0.029953211315478255, "value_loss_search": 0.10217987555329273, "value_loss_thought": 0.13744581426071817 }, { "epoch": 2.66, "grad_norm": 1.2317965960561872, "learning_rate": 4.9091640571607295e-06, "loss": 0.1827, "sft_loss": 0.016407015430741013, "step": 445, "total_loss": 0.018946515550851473, "value_loss": 0.0253950018544856, "value_loss_search": 0.07369275801845561, "value_loss_thought": 0.12946725602414516 }, { "epoch": 2.69, "grad_norm": 0.683140754700333, "learning_rate": 4.9044518910400285e-06, "loss": 0.1676, "sft_loss": 0.012768425536341966, "step": 450, "total_loss": 0.014526197344821412, "value_loss": 0.017577718090615237, "value_loss_search": 0.04510120849572559, "value_loss_thought": 0.09552053666211577 }, { "epoch": 2.72, "grad_norm": 1.0907748085551225, "learning_rate": 4.899622950890258e-06, "loss": 0.1837, "sft_loss": 0.017968191439285873, "step": 455, "total_loss": 0.02001593358502305, "value_loss": 0.020477420029783387, "value_loss_search": 0.04958656680909144, "value_loss_thought": 0.11423279177743098 }, { "epoch": 2.75, "grad_norm": 1.3850337152675154, "learning_rate": 4.894677471232556e-06, "loss": 0.1908, "sft_loss": 0.01755859658587724, "step": 460, "total_loss": 0.02234408485255699, "value_loss": 0.04785487991980517, "value_loss_search": 0.1180493140761314, "value_loss_thought": 0.26478972275981505 }, { "epoch": 2.78, "grad_norm": 1.1181978133661836, "learning_rate": 4.889615692247893e-06, "loss": 0.1841, "sft_loss": 0.021812843438237906, "step": 465, "total_loss": 0.025394389864322874, "value_loss": 0.035815464633651574, "value_loss_search": 0.09992547294255019, "value_loss_thought": 0.18659824333662983 }, { "epoch": 2.8, "grad_norm": 2.0759753145829025, "learning_rate": 4.884437859765403e-06, "loss": 0.1824, "sft_loss": 0.026800552336499094, "step": 470, "total_loss": 0.030636231325843255, "value_loss": 0.03835678935120086, "value_loss_search": 0.08862242994021016, "value_loss_thought": 0.2182318838626088 }, { "epoch": 2.83, "grad_norm": 1.4431529345376723, "learning_rate": 4.879144225250445e-06, "loss": 0.1816, "sft_loss": 0.017374001140706242, "step": 475, "total_loss": 0.01985499007805629, "value_loss": 0.02480988986644661, "value_loss_search": 0.06347666834494703, "value_loss_thought": 0.1350024510633375 }, { "epoch": 2.86, "grad_norm": 1.4247925282572933, "learning_rate": 4.873735045792395e-06, "loss": 0.1818, "sft_loss": 0.01573096849024296, "step": 480, "total_loss": 0.018171964960481547, "value_loss": 0.02440996536379316, "value_loss_search": 0.06245305103024066, "value_loss_thought": 0.13282667167441103 }, { "epoch": 2.89, "grad_norm": 0.9971468267328366, "learning_rate": 4.868210584092151e-06, "loss": 0.1467, "sft_loss": 0.014233330194838346, "step": 485, "total_loss": 0.017078536024837377, "value_loss": 0.028452056018250006, "value_loss_search": 0.08309140489452602, "value_loss_thought": 0.14452504066284746 }, { "epoch": 2.92, "grad_norm": 1.023944906300116, "learning_rate": 4.862571108449387e-06, "loss": 0.1698, "sft_loss": 0.015431807213462889, "step": 490, "total_loss": 0.01847703835471748, "value_loss": 0.030452309869269813, "value_loss_search": 0.068971808032029, "value_loss_thought": 0.1746466698442873 }, { "epoch": 2.95, "grad_norm": 2.461096472197934, "learning_rate": 4.856816892749512e-06, "loss": 0.1971, "sft_loss": 0.023769452376291154, "step": 495, "total_loss": 0.02749020796882178, "value_loss": 0.03720755761728469, "value_loss_search": 0.09986964019751668, "value_loss_thought": 0.19779082740396917 }, { "epoch": 2.98, "grad_norm": 3.0866796216704744, "learning_rate": 4.850948216450374e-06, "loss": 0.1685, "sft_loss": 0.014484391221776605, "step": 500, "total_loss": 0.017671175975374355, "value_loss": 0.03186784757053829, "value_loss_search": 0.08305875552287034, "value_loss_thought": 0.17188402622778085 }, { "epoch": 3.01, "grad_norm": 1.5870923337393499, "learning_rate": 4.844965364568688e-06, "loss": 0.1597, "sft_loss": 0.012694860575720668, "step": 505, "total_loss": 0.014846146440197572, "value_loss": 0.021512858869391492, "value_loss_search": 0.0868586509443503, "value_loss_thought": 0.08524421998045 }, { "epoch": 3.04, "grad_norm": 1.2125544575036498, "learning_rate": 4.838868627666191e-06, "loss": 0.1309, "sft_loss": 0.0139304670272395, "step": 510, "total_loss": 0.015479093678698064, "value_loss": 0.01548626560065145, "value_loss_search": 0.04746301869455181, "value_loss_thought": 0.0764271073458076 }, { "epoch": 3.07, "grad_norm": 2.334834842181477, "learning_rate": 4.8326583018355325e-06, "loss": 0.1387, "sft_loss": 0.017326009750831872, "step": 515, "total_loss": 0.018803953268570695, "value_loss": 0.014779433516605423, "value_loss_search": 0.04713020703025848, "value_loss_thought": 0.07110526093983935 }, { "epoch": 3.1, "grad_norm": 1.5097098181886008, "learning_rate": 4.826334688685895e-06, "loss": 0.1436, "sft_loss": 0.015178992017172276, "step": 520, "total_loss": 0.01744569096995292, "value_loss": 0.022666989314620876, "value_loss_search": 0.056544199488735104, "value_loss_thought": 0.12479171633535771 }, { "epoch": 3.13, "grad_norm": 1.3535205448949577, "learning_rate": 4.819898095328346e-06, "loss": 0.1298, "sft_loss": 0.012230370636098086, "step": 525, "total_loss": 0.013584458573981805, "value_loss": 0.013540879723450416, "value_loss_search": 0.05891674056322245, "value_loss_thought": 0.04941029600959155 }, { "epoch": 3.16, "grad_norm": 1.3125271270813312, "learning_rate": 4.8133488343609225e-06, "loss": 0.1235, "sft_loss": 0.011256256676279008, "step": 530, "total_loss": 0.01235277638277239, "value_loss": 0.010965196802611387, "value_loss_search": 0.03379480599472799, "value_loss_thought": 0.05392676859810308 }, { "epoch": 3.19, "grad_norm": 1.2773968571118655, "learning_rate": 4.8066872238534475e-06, "loss": 0.1315, "sft_loss": 0.015837551723234356, "step": 535, "total_loss": 0.01701334770821745, "value_loss": 0.011757960020167957, "value_loss_search": 0.04275373071531021, "value_loss_thought": 0.05130994958924475 }, { "epoch": 3.22, "grad_norm": 2.250161610850135, "learning_rate": 4.799913587332087e-06, "loss": 0.1336, "sft_loss": 0.01434246387798339, "step": 540, "total_loss": 0.017333269701339304, "value_loss": 0.02990805795383835, "value_loss_search": 0.1112344439959088, "value_loss_thought": 0.1280300162234198 }, { "epoch": 3.25, "grad_norm": 1.7005049809514607, "learning_rate": 4.793028253763633e-06, "loss": 0.1382, "sft_loss": 0.017506125732325017, "step": 545, "total_loss": 0.018909330308576954, "value_loss": 0.014032045881458543, "value_loss_search": 0.04037052274323969, "value_loss_thought": 0.0718858446669401 }, { "epoch": 3.28, "grad_norm": 0.7583090366866635, "learning_rate": 4.786031557539532e-06, "loss": 0.1302, "sft_loss": 0.01899501702282578, "step": 550, "total_loss": 0.020867432708973864, "value_loss": 0.018724157162853316, "value_loss_search": 0.0525303708315505, "value_loss_thought": 0.09726288783901964 }, { "epoch": 3.31, "grad_norm": 0.6780107958188984, "learning_rate": 4.7789238384596394e-06, "loss": 0.1332, "sft_loss": 0.01216251152800396, "step": 555, "total_loss": 0.013824499693544112, "value_loss": 0.016619880075199944, "value_loss_search": 0.043466050122447086, "value_loss_thought": 0.08949299015621363 }, { "epoch": 3.34, "grad_norm": 1.186243776959505, "learning_rate": 4.771705441715722e-06, "loss": 0.1337, "sft_loss": 0.012400240125134587, "step": 560, "total_loss": 0.015292362061109089, "value_loss": 0.02892121910044807, "value_loss_search": 0.07066335178838017, "value_loss_thought": 0.160706402354117 }, { "epoch": 3.37, "grad_norm": 0.8074975390166714, "learning_rate": 4.76437671787469e-06, "loss": 0.1276, "sft_loss": 0.013388467975892127, "step": 565, "total_loss": 0.014532541646792652, "value_loss": 0.011440736869690226, "value_loss_search": 0.042682257217265374, "value_loss_thought": 0.04884363801356813 }, { "epoch": 3.4, "grad_norm": 1.1536407465687832, "learning_rate": 4.756938022861575e-06, "loss": 0.1361, "sft_loss": 0.014692733390256762, "step": 570, "total_loss": 0.016363070430702464, "value_loss": 0.01670336974376596, "value_loss_search": 0.05988950296840585, "value_loss_thought": 0.07373745448749106 }, { "epoch": 3.43, "grad_norm": 1.296361708786197, "learning_rate": 4.7493897179422366e-06, "loss": 0.1262, "sft_loss": 0.014538243343122304, "step": 575, "total_loss": 0.016324132579029538, "value_loss": 0.017858891680498347, "value_loss_search": 0.043000358385506844, "value_loss_thought": 0.09987077550104004 }, { "epoch": 3.46, "grad_norm": 0.7473511902604624, "learning_rate": 4.741732169705829e-06, "loss": 0.128, "sft_loss": 0.01885277854744345, "step": 580, "total_loss": 0.020614391867351856, "value_loss": 0.01761613485025464, "value_loss_search": 0.07391770112133145, "value_loss_thought": 0.06701137741147249 }, { "epoch": 3.49, "grad_norm": 0.8672128287076202, "learning_rate": 4.733965750046987e-06, "loss": 0.1306, "sft_loss": 0.011551224719733, "step": 585, "total_loss": 0.012892319760794635, "value_loss": 0.013410949158787844, "value_loss_search": 0.044143242872939935, "value_loss_thought": 0.06314435001149832 }, { "epoch": 3.52, "grad_norm": 1.1458177098839266, "learning_rate": 4.72609083614777e-06, "loss": 0.1292, "sft_loss": 0.014341436792165041, "step": 590, "total_loss": 0.015776935562007564, "value_loss": 0.014354987322531088, "value_loss_search": 0.05364567942300482, "value_loss_thought": 0.06119421870671431 }, { "epoch": 3.55, "grad_norm": 0.9268770278205384, "learning_rate": 4.71810781045934e-06, "loss": 0.1297, "sft_loss": 0.015257505606859923, "step": 595, "total_loss": 0.017370533025075473, "value_loss": 0.02113027337927633, "value_loss_search": 0.059170720773727224, "value_loss_thought": 0.1098714638722413 }, { "epoch": 3.58, "grad_norm": 0.868085256277253, "learning_rate": 4.710017060683396e-06, "loss": 0.1425, "sft_loss": 0.01423517488874495, "step": 600, "total_loss": 0.01731622647575932, "value_loss": 0.03081051432359345, "value_loss_search": 0.0980118739993486, "value_loss_thought": 0.14847224191835268 }, { "epoch": 3.61, "grad_norm": 1.0537209668511847, "learning_rate": 4.701818979753331e-06, "loss": 0.1321, "sft_loss": 0.014892896311357618, "step": 605, "total_loss": 0.016185236236145782, "value_loss": 0.012923399927421997, "value_loss_search": 0.04884319881856527, "value_loss_thought": 0.05454400067619645 }, { "epoch": 3.64, "grad_norm": 0.8700570072330364, "learning_rate": 4.693513965815163e-06, "loss": 0.1508, "sft_loss": 0.019580099708400668, "step": 610, "total_loss": 0.02092106218860863, "value_loss": 0.013409624124460607, "value_loss_search": 0.04490554911869822, "value_loss_thought": 0.06237144449660263 }, { "epoch": 3.67, "grad_norm": 1.2788774174216047, "learning_rate": 4.6851024222081905e-06, "loss": 0.1375, "sft_loss": 0.014192894287407398, "step": 615, "total_loss": 0.015467491799489608, "value_loss": 0.012745974849167396, "value_loss_search": 0.05650571903670425, "value_loss_thought": 0.045462077876436524 }, { "epoch": 3.7, "grad_norm": 1.1384751682462817, "learning_rate": 4.676584757445406e-06, "loss": 0.1391, "sft_loss": 0.01579129050951451, "step": 620, "total_loss": 0.017925446468007068, "value_loss": 0.021341558844778775, "value_loss_search": 0.04930673633061815, "value_loss_thought": 0.12142573465496298 }, { "epoch": 3.73, "grad_norm": 0.9386975353990433, "learning_rate": 4.667961385193656e-06, "loss": 0.1358, "sft_loss": 0.01762553579173982, "step": 625, "total_loss": 0.01908148711366948, "value_loss": 0.014559512600328617, "value_loss_search": 0.0564095867106289, "value_loss_thought": 0.06006651317602518 }, { "epoch": 3.76, "grad_norm": 1.09540391640531, "learning_rate": 4.659232724253553e-06, "loss": 0.1426, "sft_loss": 0.012116698501631617, "step": 630, "total_loss": 0.013968724870528604, "value_loss": 0.01852026394344648, "value_loss_search": 0.037771890809654, "value_loss_thought": 0.11039022141931128 }, { "epoch": 3.79, "grad_norm": 1.1403346821597187, "learning_rate": 4.650399198539132e-06, "loss": 0.14, "sft_loss": 0.017072572163306175, "step": 635, "total_loss": 0.018839041521141554, "value_loss": 0.017664692708285655, "value_loss_search": 0.0408900124222555, "value_loss_thought": 0.10042752822591865 }, { "epoch": 3.82, "grad_norm": 0.8573467809405049, "learning_rate": 4.641461237057267e-06, "loss": 0.1402, "sft_loss": 0.01425269797910005, "step": 640, "total_loss": 0.01625731455909545, "value_loss": 0.020046164246105037, "value_loss_search": 0.06718737064234119, "value_loss_thought": 0.09318194256302377 }, { "epoch": 3.85, "grad_norm": 0.6970362791399626, "learning_rate": 4.632419273886835e-06, "loss": 0.1257, "sft_loss": 0.013124658446758985, "step": 645, "total_loss": 0.014949945608714188, "value_loss": 0.01825287001252036, "value_loss_search": 0.047629779761018655, "value_loss_thought": 0.09839317841679077 }, { "epoch": 3.88, "grad_norm": 1.0570192724917926, "learning_rate": 4.62327374815763e-06, "loss": 0.1318, "sft_loss": 0.014157434459775686, "step": 650, "total_loss": 0.016517047015622666, "value_loss": 0.023596125619087617, "value_loss_search": 0.06047542081159918, "value_loss_thought": 0.12829358246017364 }, { "epoch": 3.91, "grad_norm": 1.4519497262292758, "learning_rate": 4.614025104029046e-06, "loss": 0.1304, "sft_loss": 0.015446552366483957, "step": 655, "total_loss": 0.017752421715067614, "value_loss": 0.02305869280771731, "value_loss_search": 0.05821381665200533, "value_loss_thought": 0.12625572479846597 }, { "epoch": 3.94, "grad_norm": 1.4038621587755458, "learning_rate": 4.6046737906684955e-06, "loss": 0.1222, "sft_loss": 0.013910213205963372, "step": 660, "total_loss": 0.016714950121104265, "value_loss": 0.028047367616863994, "value_loss_search": 0.07310631623822701, "value_loss_thought": 0.15127262340945435 }, { "epoch": 3.97, "grad_norm": 1.5045007836682485, "learning_rate": 4.5952202622296015e-06, "loss": 0.1354, "sft_loss": 0.013794716726988555, "step": 665, "total_loss": 0.015795043228149553, "value_loss": 0.020003264624756413, "value_loss_search": 0.0463893650089517, "value_loss_thought": 0.11363675205575419 }, { "epoch": 4.0, "grad_norm": 1.1345147085500709, "learning_rate": 4.585664977830142e-06, "loss": 0.1411, "sft_loss": 0.01729184603318572, "step": 670, "total_loss": 0.01970745405710659, "value_loss": 0.024156079764179595, "value_loss_search": 0.04846977311616456, "value_loss_thought": 0.14477886538206802 }, { "epoch": 4.03, "grad_norm": 0.7700098195196915, "learning_rate": 4.576008401529746e-06, "loss": 0.0925, "sft_loss": 0.00861083798808977, "step": 675, "total_loss": 0.009520014304735014, "value_loss": 0.009091763565447764, "value_loss_search": 0.02745309268981373, "value_loss_thought": 0.04528101538708142 }, { "epoch": 4.06, "grad_norm": 1.2543052602388458, "learning_rate": 4.566251002307363e-06, "loss": 0.0957, "sft_loss": 0.009448495891410858, "step": 680, "total_loss": 0.010420851569779187, "value_loss": 0.00972355670351135, "value_loss_search": 0.043272700070008344, "value_loss_thought": 0.03451575428043725 }, { "epoch": 4.09, "grad_norm": 0.7516221051255656, "learning_rate": 4.556393254038486e-06, "loss": 0.0866, "sft_loss": 0.00772422740701586, "step": 685, "total_loss": 0.008484165449101511, "value_loss": 0.007599379828752717, "value_loss_search": 0.028828184264500577, "value_loss_thought": 0.03196685446855554 }, { "epoch": 4.12, "grad_norm": 0.9845886373213341, "learning_rate": 4.546435635472133e-06, "loss": 0.0995, "sft_loss": 0.0058452394208870825, "step": 690, "total_loss": 0.006937743502530225, "value_loss": 0.010925040280527298, "value_loss_search": 0.038594244975945456, "value_loss_thought": 0.04880607676673208 }, { "epoch": 4.15, "grad_norm": 0.9063088407123128, "learning_rate": 4.536378630207598e-06, "loss": 0.0863, "sft_loss": 0.009303204133175313, "step": 695, "total_loss": 0.010333730792467577, "value_loss": 0.01030526671976304, "value_loss_search": 0.04375397186494183, "value_loss_thought": 0.038688162007019855 }, { "epoch": 4.18, "grad_norm": 0.6582700642646613, "learning_rate": 4.526222726670966e-06, "loss": 0.0954, "sft_loss": 0.011040500248782337, "step": 700, "total_loss": 0.01200565958547486, "value_loss": 0.009651594169963574, "value_loss_search": 0.03989513861957903, "value_loss_thought": 0.037317614558742204 }, { "epoch": 4.21, "grad_norm": 0.743159814030366, "learning_rate": 4.515968418091394e-06, "loss": 0.0899, "sft_loss": 0.010487070470117033, "step": 705, "total_loss": 0.011825196649658665, "value_loss": 0.013381260839241804, "value_loss_search": 0.036946902065199086, "value_loss_thought": 0.07010318333050236 }, { "epoch": 4.24, "grad_norm": 0.9990248193841446, "learning_rate": 4.505616202477152e-06, "loss": 0.1002, "sft_loss": 0.009330611897166819, "step": 710, "total_loss": 0.009855728735766433, "value_loss": 0.0052511682242766256, "value_loss_search": 0.02117164042309696, "value_loss_thought": 0.020837705221492798 }, { "epoch": 4.27, "grad_norm": 0.7065067498607398, "learning_rate": 4.49516658259144e-06, "loss": 0.1018, "sft_loss": 0.013342563062906265, "step": 715, "total_loss": 0.014073112735923132, "value_loss": 0.0073054967775533445, "value_loss_search": 0.023805043454274255, "value_loss_thought": 0.03463893002911504 }, { "epoch": 4.3, "grad_norm": 0.6847509331487238, "learning_rate": 4.48462006592797e-06, "loss": 0.0993, "sft_loss": 0.010668217262718827, "step": 720, "total_loss": 0.011838797037410132, "value_loss": 0.011705797309559784, "value_loss_search": 0.047429115066006486, "value_loss_thought": 0.046217263146081676 }, { "epoch": 4.33, "grad_norm": 0.6685576496831016, "learning_rate": 4.473977164686321e-06, "loss": 0.0944, "sft_loss": 0.008604107843711972, "step": 725, "total_loss": 0.009628339805681207, "value_loss": 0.010242320024417495, "value_loss_search": 0.035196531142617005, "value_loss_thought": 0.04674202849655558 }, { "epoch": 4.36, "grad_norm": 0.75658777055452, "learning_rate": 4.46323839574706e-06, "loss": 0.1034, "sft_loss": 0.010033530904911459, "step": 730, "total_loss": 0.011016872639970643, "value_loss": 0.009833417008576362, "value_loss_search": 0.02881108484938295, "value_loss_thought": 0.04985625083095328 }, { "epoch": 4.39, "grad_norm": 0.688131213767825, "learning_rate": 4.45240428064664e-06, "loss": 0.095, "sft_loss": 0.013836181082297117, "step": 735, "total_loss": 0.014805314792602076, "value_loss": 0.009691335982915916, "value_loss_search": 0.036203794245648166, "value_loss_thought": 0.04132689285597735 }, { "epoch": 4.42, "grad_norm": 0.7096504880990531, "learning_rate": 4.4414753455520795e-06, "loss": 0.0982, "sft_loss": 0.009608610440045596, "step": 740, "total_loss": 0.010440762112193625, "value_loss": 0.00832151709196296, "value_loss_search": 0.029671950675117388, "value_loss_thought": 0.036900186177172146 }, { "epoch": 4.45, "grad_norm": 0.6217612986336695, "learning_rate": 4.430452121235396e-06, "loss": 0.0971, "sft_loss": 0.012536874134093523, "step": 745, "total_loss": 0.014267633290864978, "value_loss": 0.017307592170084263, "value_loss_search": 0.034450266263445425, "value_loss_thought": 0.10401047060558995 }, { "epoch": 4.48, "grad_norm": 0.797630098293788, "learning_rate": 4.419335143047834e-06, "loss": 0.1029, "sft_loss": 0.014243904023896903, "step": 750, "total_loss": 0.015265003470494776, "value_loss": 0.010210994727276556, "value_loss_search": 0.032982522548752516, "value_loss_thought": 0.048705436136879145 }, { "epoch": 4.51, "grad_norm": 0.6961134075635874, "learning_rate": 4.408124950893868e-06, "loss": 0.095, "sft_loss": 0.012445987621322274, "step": 755, "total_loss": 0.01341653695722016, "value_loss": 0.009705493019964706, "value_loss_search": 0.03983713596752523, "value_loss_thought": 0.03780680882375691 }, { "epoch": 4.54, "grad_norm": 0.5762572982567685, "learning_rate": 4.396822089204981e-06, "loss": 0.0998, "sft_loss": 0.011862749280408025, "step": 760, "total_loss": 0.013249826465107617, "value_loss": 0.013870771408619476, "value_loss_search": 0.0406148220283626, "value_loss_thought": 0.07035134897487297 }, { "epoch": 4.57, "grad_norm": 0.7824308185518036, "learning_rate": 4.3854271069132195e-06, "loss": 0.1015, "sft_loss": 0.012830629444215447, "step": 765, "total_loss": 0.01348854236812258, "value_loss": 0.006579129521428228, "value_loss_search": 0.02838464450221636, "value_loss_thought": 0.024248391312721652 }, { "epoch": 4.6, "grad_norm": 1.115871263491779, "learning_rate": 4.373940557424537e-06, "loss": 0.0929, "sft_loss": 0.013013543572742491, "step": 770, "total_loss": 0.014466597687578542, "value_loss": 0.014530540375199052, "value_loss_search": 0.044251333865793184, "value_loss_thought": 0.07199298751397691 }, { "epoch": 4.63, "grad_norm": 0.5639360855348526, "learning_rate": 4.36236299859192e-06, "loss": 0.0942, "sft_loss": 0.010940996720455587, "step": 775, "total_loss": 0.01194770065769717, "value_loss": 0.010067038443867204, "value_loss_search": 0.032814532905274515, "value_loss_thought": 0.04772177415952683 }, { "epoch": 4.65, "grad_norm": 1.1373613205181072, "learning_rate": 4.350694992688289e-06, "loss": 0.1003, "sft_loss": 0.009771948284469544, "step": 780, "total_loss": 0.010786435697650632, "value_loss": 0.010144873889385054, "value_loss_search": 0.04210830284959002, "value_loss_thought": 0.03905068875028519 }, { "epoch": 4.68, "grad_norm": 0.7728548735492248, "learning_rate": 4.338937106379199e-06, "loss": 0.0986, "sft_loss": 0.01460006288252771, "step": 785, "total_loss": 0.015538786767115198, "value_loss": 0.009387238657767227, "value_loss_search": 0.03280571626974051, "value_loss_thought": 0.04229219339322299 }, { "epoch": 4.71, "grad_norm": 0.8732356620640505, "learning_rate": 4.32708991069531e-06, "loss": 0.0941, "sft_loss": 0.009909180679824204, "step": 790, "total_loss": 0.01077013838946641, "value_loss": 0.0086095773167699, "value_loss_search": 0.0318576935078454, "value_loss_thought": 0.03701892458407201 }, { "epoch": 4.74, "grad_norm": 1.143053369270145, "learning_rate": 4.315153981004666e-06, "loss": 0.0966, "sft_loss": 0.009806250128895045, "step": 795, "total_loss": 0.011583962598513154, "value_loss": 0.017777125288466776, "value_loss_search": 0.040363578098720154, "value_loss_thought": 0.1018534237449785 }, { "epoch": 4.77, "grad_norm": 0.7780179009340498, "learning_rate": 4.3031298969847406e-06, "loss": 0.1002, "sft_loss": 0.009359034500084818, "step": 800, "total_loss": 0.010968359952437367, "value_loss": 0.016093254049246754, "value_loss_search": 0.05174213330789144, "value_loss_thought": 0.07700389907186037 }, { "epoch": 4.8, "grad_norm": 0.7183429149076678, "learning_rate": 4.29101824259429e-06, "loss": 0.1018, "sft_loss": 0.009059342555701733, "step": 805, "total_loss": 0.010009716682918678, "value_loss": 0.009503741757498574, "value_loss_search": 0.03197408163345017, "value_loss_thought": 0.0440558526033783 }, { "epoch": 4.83, "grad_norm": 0.6507483126573996, "learning_rate": 4.2788196060449925e-06, "loss": 0.1002, "sft_loss": 0.010762089781928807, "step": 810, "total_loss": 0.012873523510279483, "value_loss": 0.021114336799837475, "value_loss_search": 0.04117565880492293, "value_loss_thought": 0.12773903450543003 }, { "epoch": 4.86, "grad_norm": 0.8117830572861622, "learning_rate": 4.266534579772881e-06, "loss": 0.0998, "sft_loss": 0.016996108298189937, "step": 815, "total_loss": 0.01824291221478802, "value_loss": 0.012468039073382897, "value_loss_search": 0.026130347241348773, "value_loss_thought": 0.07361396466958467 }, { "epoch": 4.89, "grad_norm": 0.9626098886691061, "learning_rate": 4.254163760409571e-06, "loss": 0.1041, "sft_loss": 0.01157297370955348, "step": 820, "total_loss": 0.012655413702026408, "value_loss": 0.01082440062873502, "value_loss_search": 0.029132241165916638, "value_loss_thought": 0.057462964772071246 }, { "epoch": 4.92, "grad_norm": 0.6101284730372293, "learning_rate": 4.2417077487532835e-06, "loss": 0.0917, "sft_loss": 0.00924121611751616, "step": 825, "total_loss": 0.010044124071836791, "value_loss": 0.008029079741447731, "value_loss_search": 0.031784425488490343, "value_loss_thought": 0.032448212833514845 }, { "epoch": 4.95, "grad_norm": 0.8024833457256252, "learning_rate": 4.229167149739667e-06, "loss": 0.094, "sft_loss": 0.00918948817998171, "step": 830, "total_loss": 0.010426052451214219, "value_loss": 0.01236564262903812, "value_loss_search": 0.03679989460033539, "value_loss_thought": 0.062125246099412834 }, { "epoch": 4.98, "grad_norm": 1.1622516088977335, "learning_rate": 4.216542572412423e-06, "loss": 0.0952, "sft_loss": 0.007891789707355202, "step": 835, "total_loss": 0.00946709308821454, "value_loss": 0.015753033644841707, "value_loss_search": 0.03111598624600447, "value_loss_thought": 0.0949082830469706 }, { "epoch": 5.01, "grad_norm": 0.6908442768231199, "learning_rate": 4.203834629893719e-06, "loss": 0.0811, "sft_loss": 0.007326198380906135, "step": 840, "total_loss": 0.008643819743883795, "value_loss": 0.013176213806036684, "value_loss_search": 0.05124346678378515, "value_loss_thought": 0.0541662441482913 }, { "epoch": 5.04, "grad_norm": 0.7901490430318995, "learning_rate": 4.19104393935442e-06, "loss": 0.0662, "sft_loss": 0.007722388836555183, "step": 845, "total_loss": 0.00843456873717514, "value_loss": 0.0071217986454485075, "value_loss_search": 0.028109038909803985, "value_loss_thought": 0.028865350570777083 }, { "epoch": 5.07, "grad_norm": 1.0101019251398367, "learning_rate": 4.178171121984109e-06, "loss": 0.0699, "sft_loss": 0.008467405906412751, "step": 850, "total_loss": 0.008816118605045631, "value_loss": 0.003487127016751401, "value_loss_search": 0.013744208114383127, "value_loss_thought": 0.014152807989376015 }, { "epoch": 5.1, "grad_norm": 0.653401555518481, "learning_rate": 4.16521680296092e-06, "loss": 0.067, "sft_loss": 0.010541580687277018, "step": 855, "total_loss": 0.011391171138998856, "value_loss": 0.008495904014353073, "value_loss_search": 0.02280127693209124, "value_loss_thought": 0.045165955742459116 }, { "epoch": 5.13, "grad_norm": 0.8573274418097468, "learning_rate": 4.152181611421179e-06, "loss": 0.0607, "sft_loss": 0.00878625299083069, "step": 860, "total_loss": 0.009619847631120138, "value_loss": 0.00833594629189065, "value_loss_search": 0.02539439773918275, "value_loss_thought": 0.04129317272327171 }, { "epoch": 5.16, "grad_norm": 0.6170985982380647, "learning_rate": 4.139066180428846e-06, "loss": 0.0655, "sft_loss": 0.009596668492304162, "step": 865, "total_loss": 0.010120830915363399, "value_loss": 0.005241623797928696, "value_loss_search": 0.021597909021647866, "value_loss_thought": 0.020335081457960768 }, { "epoch": 5.19, "grad_norm": 0.825156846628994, "learning_rate": 4.125871146944771e-06, "loss": 0.0695, "sft_loss": 0.010449141904246062, "step": 870, "total_loss": 0.011130747148763475, "value_loss": 0.006816053235161235, "value_loss_search": 0.02648081009813268, "value_loss_thought": 0.028047615623654563 }, { "epoch": 5.22, "grad_norm": 0.5819547422187984, "learning_rate": 4.112597151795758e-06, "loss": 0.063, "sft_loss": 0.00804176195524633, "step": 875, "total_loss": 0.009046485570181062, "value_loss": 0.010047235474598893, "value_loss_search": 0.022064273892272012, "value_loss_thought": 0.05831360963654788 }, { "epoch": 5.25, "grad_norm": 0.8024901403445527, "learning_rate": 4.099244839643448e-06, "loss": 0.0589, "sft_loss": 0.007425288169179112, "step": 880, "total_loss": 0.007866452395560409, "value_loss": 0.004411642082025935, "value_loss_search": 0.01583855556901881, "value_loss_thought": 0.019454581364698242 }, { "epoch": 5.28, "grad_norm": 0.6299816489940798, "learning_rate": 4.085814858953001e-06, "loss": 0.0707, "sft_loss": 0.010274743323680013, "step": 885, "total_loss": 0.011220432494735633, "value_loss": 0.009456892390926442, "value_loss_search": 0.017595245018355854, "value_loss_thought": 0.05805989508698985 }, { "epoch": 5.31, "grad_norm": 0.5775443078383485, "learning_rate": 4.072307861961614e-06, "loss": 0.0668, "sft_loss": 0.007527518505230546, "step": 890, "total_loss": 0.008477710493883706, "value_loss": 0.00950191973965957, "value_loss_search": 0.03369882960868278, "value_loss_thought": 0.042316528412220576 }, { "epoch": 5.34, "grad_norm": 0.7217258713196177, "learning_rate": 4.058724504646834e-06, "loss": 0.0695, "sft_loss": 0.008143483952153474, "step": 895, "total_loss": 0.009240032660204633, "value_loss": 0.01096548721779982, "value_loss_search": 0.03710676123126859, "value_loss_thought": 0.050617136721757564 }, { "epoch": 5.37, "grad_norm": 0.7509981121776955, "learning_rate": 4.045065446694709e-06, "loss": 0.0623, "sft_loss": 0.009156511997571216, "step": 900, "total_loss": 0.009822533285591817, "value_loss": 0.006660212647693698, "value_loss_search": 0.02795750253162623, "value_loss_thought": 0.02532419814169771 }, { "epoch": 5.4, "grad_norm": 0.5391534216473769, "learning_rate": 4.031331351467744e-06, "loss": 0.0693, "sft_loss": 0.006840780581114814, "step": 905, "total_loss": 0.007346747693952693, "value_loss": 0.005059671220953988, "value_loss_search": 0.023152905100801036, "value_loss_thought": 0.017324464485159296 }, { "epoch": 5.43, "grad_norm": 0.8478085328901763, "learning_rate": 4.017522885972687e-06, "loss": 0.066, "sft_loss": 0.00748998821945861, "step": 910, "total_loss": 0.008190430112915693, "value_loss": 0.007004418966835147, "value_loss_search": 0.0241462381236488, "value_loss_thought": 0.03188911370725691 }, { "epoch": 5.46, "grad_norm": 0.6273190350109188, "learning_rate": 4.0036407208281335e-06, "loss": 0.0642, "sft_loss": 0.007753341854549944, "step": 915, "total_loss": 0.008310681724867663, "value_loss": 0.0055733984515427435, "value_loss_search": 0.017653270972687096, "value_loss_thought": 0.026933916354755637 }, { "epoch": 5.49, "grad_norm": 0.6275397576143824, "learning_rate": 3.989685530231958e-06, "loss": 0.0723, "sft_loss": 0.008013604581356049, "step": 920, "total_loss": 0.008786806087823607, "value_loss": 0.007732015183682961, "value_loss_search": 0.018130889449082588, "value_loss_thought": 0.04372523128440662 }, { "epoch": 5.52, "grad_norm": 0.6557274591871389, "learning_rate": 3.975657991928573e-06, "loss": 0.0654, "sft_loss": 0.007132729375734925, "step": 925, "total_loss": 0.0075831895907356285, "value_loss": 0.004504602500765032, "value_loss_search": 0.017856063829344748, "value_loss_thought": 0.018180756335345903 }, { "epoch": 5.55, "grad_norm": 0.6502931359394508, "learning_rate": 3.961558787176012e-06, "loss": 0.0702, "sft_loss": 0.0077712137601338325, "step": 930, "total_loss": 0.008426044349789663, "value_loss": 0.006548305749731753, "value_loss_search": 0.020701198827305235, "value_loss_thought": 0.03168524749562494 }, { "epoch": 5.58, "grad_norm": 0.5593737214026034, "learning_rate": 3.9473886007128424e-06, "loss": 0.0674, "sft_loss": 0.0070376997464336455, "step": 935, "total_loss": 0.007807116948492876, "value_loss": 0.007694172377568975, "value_loss_search": 0.02574841559246579, "value_loss_thought": 0.03580496397080424 }, { "epoch": 5.61, "grad_norm": 0.5565142984447847, "learning_rate": 3.933148120724913e-06, "loss": 0.0695, "sft_loss": 0.005548381910193712, "step": 940, "total_loss": 0.006512936933782498, "value_loss": 0.009645550738008523, "value_loss_search": 0.02102770657133988, "value_loss_thought": 0.056136698765476466 }, { "epoch": 5.64, "grad_norm": 0.6653147056652764, "learning_rate": 3.9188380388119325e-06, "loss": 0.075, "sft_loss": 0.009176643792307005, "step": 945, "total_loss": 0.010155323132971716, "value_loss": 0.009786792815066291, "value_loss_search": 0.02649424351284324, "value_loss_thought": 0.05180009940095261 }, { "epoch": 5.67, "grad_norm": 0.4898734764580984, "learning_rate": 3.904459049953877e-06, "loss": 0.0661, "sft_loss": 0.008050526608712971, "step": 950, "total_loss": 0.008603187052369777, "value_loss": 0.005526604052067796, "value_loss_search": 0.02066867913152919, "value_loss_thought": 0.0235441530123353 }, { "epoch": 5.7, "grad_norm": 0.7610416619124152, "learning_rate": 3.890011852477243e-06, "loss": 0.0712, "sft_loss": 0.00837269393960014, "step": 955, "total_loss": 0.009244194875520861, "value_loss": 0.00871500901721447, "value_loss_search": 0.026370803010149758, "value_loss_thought": 0.04334926914202271 }, { "epoch": 5.73, "grad_norm": 0.7172771316565127, "learning_rate": 3.875497148021129e-06, "loss": 0.0726, "sft_loss": 0.008401849202346056, "step": 960, "total_loss": 0.008891117146237092, "value_loss": 0.004892679379145193, "value_loss_search": 0.01661159728510029, "value_loss_thought": 0.022529837636739103 }, { "epoch": 5.76, "grad_norm": 0.6480324222376628, "learning_rate": 3.860915641503161e-06, "loss": 0.0646, "sft_loss": 0.00682629911461845, "step": 965, "total_loss": 0.007262193315084176, "value_loss": 0.004358942487397144, "value_loss_search": 0.01821862360557134, "value_loss_thought": 0.016652916284147067 }, { "epoch": 5.79, "grad_norm": 0.6949624103946892, "learning_rate": 3.84626804108526e-06, "loss": 0.0719, "sft_loss": 0.008461356349289417, "step": 970, "total_loss": 0.009027346382336533, "value_loss": 0.005659900530326922, "value_loss_search": 0.019068497527393903, "value_loss_thought": 0.02621070666225478 }, { "epoch": 5.82, "grad_norm": 0.7622591624750499, "learning_rate": 3.831555058139244e-06, "loss": 0.0707, "sft_loss": 0.007402687979629263, "step": 975, "total_loss": 0.00789324305359287, "value_loss": 0.004905550461671737, "value_loss_search": 0.013050667314735165, "value_loss_thought": 0.02619373640912954 }, { "epoch": 5.85, "grad_norm": 0.6838250232406838, "learning_rate": 3.8167774072122854e-06, "loss": 0.0673, "sft_loss": 0.008353895461186766, "step": 980, "total_loss": 0.00888050429666123, "value_loss": 0.005266088167263661, "value_loss_search": 0.01670047964719288, "value_loss_thought": 0.025428225997166008 }, { "epoch": 5.88, "grad_norm": 0.9218213608030134, "learning_rate": 3.8019358059922052e-06, "loss": 0.0708, "sft_loss": 0.006913194921799004, "step": 985, "total_loss": 0.007807648131026213, "value_loss": 0.008944532042056608, "value_loss_search": 0.02053129872363115, "value_loss_thought": 0.05102495740206905 }, { "epoch": 5.91, "grad_norm": 0.7564910242846892, "learning_rate": 3.7870309752726185e-06, "loss": 0.064, "sft_loss": 0.0055543248075991866, "step": 990, "total_loss": 0.00604638959850945, "value_loss": 0.004920647482958884, "value_loss_search": 0.01775021549135545, "value_loss_thought": 0.021614964383616098 }, { "epoch": 5.94, "grad_norm": 0.7406424491645267, "learning_rate": 3.772063638917931e-06, "loss": 0.062, "sft_loss": 0.007381244131829589, "step": 995, "total_loss": 0.00787693980830113, "value_loss": 0.004956956215028186, "value_loss_search": 0.0186556116294355, "value_loss_thought": 0.02100003812029172 }, { "epoch": 5.97, "grad_norm": 0.8597100129196285, "learning_rate": 3.75703452382818e-06, "loss": 0.0686, "sft_loss": 0.0067865438759326935, "step": 1000, "total_loss": 0.0074210830740980786, "value_loss": 0.0063453914136630376, "value_loss_search": 0.020665711158505927, "value_loss_thought": 0.030097420751735625 }, { "epoch": 6.0, "grad_norm": 0.6387120370362654, "learning_rate": 3.741944359903734e-06, "loss": 0.0685, "sft_loss": 0.007980260415934026, "step": 1005, "total_loss": 0.00841312516255357, "value_loss": 0.00432864762956342, "value_loss_search": 0.016856307840919273, "value_loss_thought": 0.017772873218109452 }, { "epoch": 6.03, "grad_norm": 0.47299058750162554, "learning_rate": 3.7267938800098454e-06, "loss": 0.0455, "sft_loss": 0.005139627197058872, "step": 1010, "total_loss": 0.005742018675823602, "value_loss": 0.006023914470279124, "value_loss_search": 0.014465152566535267, "value_loss_thought": 0.03372616275910332 }, { "epoch": 6.06, "grad_norm": 0.5769250479404755, "learning_rate": 3.7115838199410566e-06, "loss": 0.0442, "sft_loss": 0.004237775265937671, "step": 1015, "total_loss": 0.004536310447571168, "value_loss": 0.002985352237874395, "value_loss_search": 0.012497988520624403, "value_loss_thought": 0.01138482937376466 }, { "epoch": 6.09, "grad_norm": 0.5775093614029521, "learning_rate": 3.696314918385466e-06, "loss": 0.0455, "sft_loss": 0.004376189230242744, "step": 1020, "total_loss": 0.004725277596082833, "value_loss": 0.0034908839002582683, "value_loss_search": 0.012387420580819253, "value_loss_thought": 0.015539650371397328 }, { "epoch": 6.12, "grad_norm": 0.5743014343738564, "learning_rate": 3.680987916888855e-06, "loss": 0.0464, "sft_loss": 0.004472998692654074, "step": 1025, "total_loss": 0.004852331803667198, "value_loss": 0.0037933312117729655, "value_loss_search": 0.009115806162424179, "value_loss_thought": 0.02123084324521187 }, { "epoch": 6.15, "grad_norm": 1.0135045723056435, "learning_rate": 3.6656035598186717e-06, "loss": 0.0483, "sft_loss": 0.006532586639514193, "step": 1030, "total_loss": 0.007033583752149753, "value_loss": 0.005009971209847208, "value_loss_search": 0.01218861587973379, "value_loss_thought": 0.02789115408404541 }, { "epoch": 6.18, "grad_norm": 0.8265173915326937, "learning_rate": 3.650162594327881e-06, "loss": 0.0464, "sft_loss": 0.005671659158542753, "step": 1035, "total_loss": 0.006245543843522228, "value_loss": 0.005738847342217923, "value_loss_search": 0.015518051942945022, "value_loss_thought": 0.030392726854552167 }, { "epoch": 6.21, "grad_norm": 0.47962371612527804, "learning_rate": 3.634665770318678e-06, "loss": 0.0413, "sft_loss": 0.005972391797695309, "step": 1040, "total_loss": 0.006403001316243717, "value_loss": 0.004306095417541655, "value_loss_search": 0.016831279399025335, "value_loss_thought": 0.017617484057427645 }, { "epoch": 6.24, "grad_norm": 1.0086948671095952, "learning_rate": 3.619113840406071e-06, "loss": 0.0491, "sft_loss": 0.004732140112901106, "step": 1045, "total_loss": 0.005075448810612215, "value_loss": 0.003433087062830964, "value_loss_search": 0.009895986284152513, "value_loss_thought": 0.01756871009142742 }, { "epoch": 6.27, "grad_norm": 0.6224166386357451, "learning_rate": 3.6035075598813275e-06, "loss": 0.0508, "sft_loss": 0.005647319235140458, "step": 1050, "total_loss": 0.005981965384165733, "value_loss": 0.0033464612621173727, "value_loss_search": 0.014234105288312548, "value_loss_thought": 0.012537584816254822 }, { "epoch": 6.3, "grad_norm": 0.6803707206328181, "learning_rate": 3.587847686675293e-06, "loss": 0.0444, "sft_loss": 0.004154384031426162, "step": 1055, "total_loss": 0.0045537911174491795, "value_loss": 0.0039940711957683565, "value_loss_search": 0.012944002056576665, "value_loss_thought": 0.019008567334594773 }, { "epoch": 6.33, "grad_norm": 0.6488360833237377, "learning_rate": 3.572134981321582e-06, "loss": 0.0464, "sft_loss": 0.00495091185439378, "step": 1060, "total_loss": 0.005601490682847654, "value_loss": 0.006505788123034506, "value_loss_search": 0.00773236445022576, "value_loss_thought": 0.04431394169132545 }, { "epoch": 6.36, "grad_norm": 0.470284848811417, "learning_rate": 3.556370206919643e-06, "loss": 0.0412, "sft_loss": 0.006972516793757677, "step": 1065, "total_loss": 0.007320523636008147, "value_loss": 0.0034800680246689806, "value_loss_search": 0.015306914580980902, "value_loss_thought": 0.012533629250856392 }, { "epoch": 6.39, "grad_norm": 0.696033260327911, "learning_rate": 3.5405541290976968e-06, "loss": 0.0411, "sft_loss": 0.004306224733591079, "step": 1070, "total_loss": 0.004650098075256892, "value_loss": 0.003438732988161064, "value_loss_search": 0.013183290196491271, "value_loss_thought": 0.014326573800536835 }, { "epoch": 6.42, "grad_norm": 0.8518349953519152, "learning_rate": 3.5246875159755554e-06, "loss": 0.0393, "sft_loss": 0.004596246278379112, "step": 1075, "total_loss": 0.005092797088298085, "value_loss": 0.004965507755696308, "value_loss_search": 0.013638158090543584, "value_loss_thought": 0.026085903684634103 }, { "epoch": 6.45, "grad_norm": 0.7830604366139895, "learning_rate": 3.5087711381273144e-06, "loss": 0.0483, "sft_loss": 0.004399802925763652, "step": 1080, "total_loss": 0.00474689214189965, "value_loss": 0.0034708920188450064, "value_loss_search": 0.014559524009473534, "value_loss_thought": 0.013207611872235247 }, { "epoch": 6.48, "grad_norm": 1.0245098727939808, "learning_rate": 3.49280576854393e-06, "loss": 0.0453, "sft_loss": 0.006848539051134139, "step": 1085, "total_loss": 0.007486119516067901, "value_loss": 0.0063758047096257545, "value_loss_search": 0.01603142107730946, "value_loss_thought": 0.034975016615135246 }, { "epoch": 6.51, "grad_norm": 0.6521674554950044, "learning_rate": 3.4767921825956824e-06, "loss": 0.0487, "sft_loss": 0.004285465716384352, "step": 1090, "total_loss": 0.004569660496787265, "value_loss": 0.0028419478604973848, "value_loss_search": 0.008792001151141449, "value_loss_thought": 0.013943581694456952 }, { "epoch": 6.53, "grad_norm": 0.7150319458252575, "learning_rate": 3.4607311579945124e-06, "loss": 0.0506, "sft_loss": 0.006909280724357814, "step": 1095, "total_loss": 0.007147605080501762, "value_loss": 0.0023832433014376875, "value_loss_search": 0.010291470044728612, "value_loss_thought": 0.008774476365078953 }, { "epoch": 6.56, "grad_norm": 0.9363000353600848, "learning_rate": 3.444623474756258e-06, "loss": 0.0527, "sft_loss": 0.003712919045938179, "step": 1100, "total_loss": 0.004117744177119675, "value_loss": 0.004048251279891701, "value_loss_search": 0.012587877828525506, "value_loss_thought": 0.01979813240959629 }, { "epoch": 6.59, "grad_norm": 0.5759000271014428, "learning_rate": 3.4284699151627672e-06, "loss": 0.0463, "sft_loss": 0.005106915923533961, "step": 1105, "total_loss": 0.0054412948647041045, "value_loss": 0.0033437895152928832, "value_loss_search": 0.01137218730814311, "value_loss_thought": 0.015378128899897092 }, { "epoch": 6.62, "grad_norm": 0.7357711398285077, "learning_rate": 3.412271263723909e-06, "loss": 0.0434, "sft_loss": 0.005000182124786079, "step": 1110, "total_loss": 0.005477192047459311, "value_loss": 0.004770098890833197, "value_loss_search": 0.012354543731896683, "value_loss_thought": 0.025806247426635308 }, { "epoch": 6.65, "grad_norm": 0.8938685745921593, "learning_rate": 3.3960283071394717e-06, "loss": 0.0469, "sft_loss": 0.0063235011184588075, "step": 1115, "total_loss": 0.006910537980154174, "value_loss": 0.005870367820580214, "value_loss_search": 0.014325453480932993, "value_loss_thought": 0.032637489220542194 }, { "epoch": 6.68, "grad_norm": 0.6909317413670801, "learning_rate": 3.3797418342609577e-06, "loss": 0.047, "sft_loss": 0.004918072844156995, "step": 1120, "total_loss": 0.0052730022313710375, "value_loss": 0.0035492941345751207, "value_loss_search": 0.010046540721191377, "value_loss_thought": 0.018347811973399075 }, { "epoch": 6.71, "grad_norm": 0.5732090332805206, "learning_rate": 3.3634126360532694e-06, "loss": 0.0468, "sft_loss": 0.005004867579555139, "step": 1125, "total_loss": 0.005580213024023806, "value_loss": 0.005753454113050793, "value_loss_search": 0.0135478620575046, "value_loss_thought": 0.03247977066948806 }, { "epoch": 6.74, "grad_norm": 0.5934033812484811, "learning_rate": 3.347041505556298e-06, "loss": 0.0463, "sft_loss": 0.005900320567889139, "step": 1130, "total_loss": 0.006214558424846928, "value_loss": 0.0031423781176272312, "value_loss_search": 0.012277730915150187, "value_loss_thought": 0.01286129405855263 }, { "epoch": 6.77, "grad_norm": 0.6696886862461858, "learning_rate": 3.3306292378464083e-06, "loss": 0.0508, "sft_loss": 0.007460485817864538, "step": 1135, "total_loss": 0.007751618086540191, "value_loss": 0.0029113226871686493, "value_loss_search": 0.01191347677657859, "value_loss_thought": 0.011377104656867231 }, { "epoch": 6.8, "grad_norm": 0.7790037685619419, "learning_rate": 3.314176629997825e-06, "loss": 0.0452, "sft_loss": 0.004827470483724028, "step": 1140, "total_loss": 0.005163270902585282, "value_loss": 0.003358004500989864, "value_loss_search": 0.013757221815399134, "value_loss_thought": 0.013106814269212919 }, { "epoch": 6.83, "grad_norm": 0.5505345044218094, "learning_rate": 3.297684481043922e-06, "loss": 0.0469, "sft_loss": 0.0062236432102508845, "step": 1145, "total_loss": 0.006526504096120789, "value_loss": 0.003028608957868073, "value_loss_search": 0.012845393837199025, "value_loss_thought": 0.011383477907293127 }, { "epoch": 6.86, "grad_norm": 0.46343408162242566, "learning_rate": 3.281153591938418e-06, "loss": 0.0433, "sft_loss": 0.005084162973798811, "step": 1150, "total_loss": 0.005412001899304642, "value_loss": 0.003278389718082053, "value_loss_search": 0.012043231101108632, "value_loss_thought": 0.01418388647671236 }, { "epoch": 6.89, "grad_norm": 0.5728946583207882, "learning_rate": 3.264584765516474e-06, "loss": 0.0513, "sft_loss": 0.008054213871946558, "step": 1155, "total_loss": 0.008398819074250241, "value_loss": 0.0034460521280379906, "value_loss_search": 0.013998744965681454, "value_loss_thought": 0.01356967230240116 }, { "epoch": 6.92, "grad_norm": 0.5867841886877614, "learning_rate": 3.2479788064557084e-06, "loss": 0.0424, "sft_loss": 0.004617373802466318, "step": 1160, "total_loss": 0.004921634896254546, "value_loss": 0.003042611040564225, "value_loss_search": 0.013083630732762686, "value_loss_thought": 0.01125725753390725 }, { "epoch": 6.95, "grad_norm": 0.5749765325052467, "learning_rate": 3.231336521237113e-06, "loss": 0.0425, "sft_loss": 0.005324905528686941, "step": 1165, "total_loss": 0.0056936069204539305, "value_loss": 0.003687014164006541, "value_loss_search": 0.01334903096231983, "value_loss_thought": 0.016147082374664022 }, { "epoch": 6.98, "grad_norm": 0.5228964319711119, "learning_rate": 3.2146587181058858e-06, "loss": 0.049, "sft_loss": 0.003777366707799956, "step": 1170, "total_loss": 0.004077984397645196, "value_loss": 0.0030061770619795427, "value_loss_search": 0.011763368438232646, "value_loss_thought": 0.012286048040141394 }, { "epoch": 7.01, "grad_norm": 0.40592168178993326, "learning_rate": 3.1979462070321817e-06, "loss": 0.0374, "sft_loss": 0.005217826526495628, "step": 1175, "total_loss": 0.005588610990344023, "value_loss": 0.003707844631036039, "value_loss_search": 0.01431789886352135, "value_loss_thought": 0.015344858078242396 }, { "epoch": 7.04, "grad_norm": 0.5057051615052391, "learning_rate": 3.1811997996717716e-06, "loss": 0.0303, "sft_loss": 0.0033945336355827747, "step": 1180, "total_loss": 0.0038138232659576943, "value_loss": 0.004192896059157647, "value_loss_search": 0.006568620411331949, "value_loss_thought": 0.02697454801736967 }, { "epoch": 7.07, "grad_norm": 0.751983057399965, "learning_rate": 3.1644203093266257e-06, "loss": 0.0311, "sft_loss": 0.0022496749937999994, "step": 1185, "total_loss": 0.0026717950843249128, "value_loss": 0.004221200832080285, "value_loss_search": 0.008355579411721692, "value_loss_thought": 0.025414026997623296 }, { "epoch": 7.1, "grad_norm": 0.48042996735947746, "learning_rate": 3.147608550905415e-06, "loss": 0.0303, "sft_loss": 0.0035940095724072306, "step": 1190, "total_loss": 0.0038380636684223644, "value_loss": 0.002440540775091904, "value_loss_search": 0.008547667160826222, "value_loss_thought": 0.01097665907091141 }, { "epoch": 7.13, "grad_norm": 0.38813805754778213, "learning_rate": 3.1307653408839316e-06, "loss": 0.0314, "sft_loss": 0.0033699018502375113, "step": 1195, "total_loss": 0.003613495991919535, "value_loss": 0.002435941319390622, "value_loss_search": 0.009487671555439193, "value_loss_thought": 0.009999858911305637 }, { "epoch": 7.16, "grad_norm": 0.500612598170884, "learning_rate": 3.1138914972654423e-06, "loss": 0.0328, "sft_loss": 0.0036761065653990953, "step": 1200, "total_loss": 0.00389666182005044, "value_loss": 0.0022055522495065816, "value_loss_search": 0.010129135770739595, "value_loss_thought": 0.0075152823273128885 }, { "epoch": 7.19, "grad_norm": 0.6490281681098725, "learning_rate": 3.0969878395409536e-06, "loss": 0.0253, "sft_loss": 0.003135368030052632, "step": 1205, "total_loss": 0.0033622780613200122, "value_loss": 0.002269100064461327, "value_loss_search": 0.009437274075094138, "value_loss_thought": 0.00871552659527879 }, { "epoch": 7.22, "grad_norm": 0.4935810834710449, "learning_rate": 3.08005518864942e-06, "loss": 0.0306, "sft_loss": 0.0035176657198462634, "step": 1210, "total_loss": 0.0037368611569259967, "value_loss": 0.0021919542997125064, "value_loss_search": 0.008411810171332945, "value_loss_thought": 0.009123824179141593 }, { "epoch": 7.25, "grad_norm": 0.5321452984531243, "learning_rate": 3.06309436693787e-06, "loss": 0.0298, "sft_loss": 0.0036181122821290048, "step": 1215, "total_loss": 0.003920620708220213, "value_loss": 0.0030250843786689074, "value_loss_search": 0.012618535867022728, "value_loss_thought": 0.011582138900485007 }, { "epoch": 7.28, "grad_norm": 0.5041384981332757, "learning_rate": 3.0461061981214685e-06, "loss": 0.029, "sft_loss": 0.0037700470944400876, "step": 1220, "total_loss": 0.003952616032614742, "value_loss": 0.001825689259169394, "value_loss_search": 0.007131004981476963, "value_loss_thought": 0.0074745089516000006 }, { "epoch": 7.31, "grad_norm": 0.825130289977903, "learning_rate": 3.029091507243514e-06, "loss": 0.0337, "sft_loss": 0.00417679272359237, "step": 1225, "total_loss": 0.004465274805897934, "value_loss": 0.0028848204653513674, "value_loss_search": 0.012175960628178472, "value_loss_thought": 0.010902603188787907 }, { "epoch": 7.34, "grad_norm": 0.48612582380031194, "learning_rate": 3.0120511206353692e-06, "loss": 0.0306, "sft_loss": 0.0037190442701103164, "step": 1230, "total_loss": 0.003983076896520288, "value_loss": 0.0026403263425891057, "value_loss_search": 0.009736456504344914, "value_loss_thought": 0.011386154282945427 }, { "epoch": 7.37, "grad_norm": 0.49557711764797474, "learning_rate": 2.9949858658763297e-06, "loss": 0.0293, "sft_loss": 0.0033108420844655483, "step": 1235, "total_loss": 0.003569280348868631, "value_loss": 0.002584382791928874, "value_loss_search": 0.009431568763068299, "value_loss_thought": 0.011243493664233028 }, { "epoch": 7.4, "grad_norm": 0.5415430915788131, "learning_rate": 2.9778965717534314e-06, "loss": 0.0323, "sft_loss": 0.0030432559084147214, "step": 1240, "total_loss": 0.003313265562081824, "value_loss": 0.002700096501041571, "value_loss_search": 0.006881270110670812, "value_loss_thought": 0.014719502057914724 }, { "epoch": 7.43, "grad_norm": 0.708236640225555, "learning_rate": 2.9607840682211987e-06, "loss": 0.0315, "sft_loss": 0.003214700281387195, "step": 1245, "total_loss": 0.003709355751084331, "value_loss": 0.004946554816625337, "value_loss_search": 0.006554803127994546, "value_loss_thought": 0.03301763468744454 }, { "epoch": 7.46, "grad_norm": 0.5437059054594167, "learning_rate": 2.9436491863613404e-06, "loss": 0.0316, "sft_loss": 0.0036181325966026636, "step": 1250, "total_loss": 0.003912387714774468, "value_loss": 0.0029425512826605884, "value_loss_search": 0.01317626194404511, "value_loss_thought": 0.01036414824043277 }, { "epoch": 7.49, "grad_norm": 0.6432414654067561, "learning_rate": 2.9264927583423847e-06, "loss": 0.0306, "sft_loss": 0.003098224982386455, "step": 1255, "total_loss": 0.003732235044594745, "value_loss": 0.006340100448505836, "value_loss_search": 0.010029617098223299, "value_loss_thought": 0.040691186868843945 }, { "epoch": 7.52, "grad_norm": 0.7784154218893079, "learning_rate": 2.9093156173792675e-06, "loss": 0.0329, "sft_loss": 0.003491103381384164, "step": 1260, "total_loss": 0.003718133881397989, "value_loss": 0.002270305072852352, "value_loss_search": 0.010139237881389818, "value_loss_thought": 0.008023202697256693 }, { "epoch": 7.55, "grad_norm": 0.5051189878209226, "learning_rate": 2.8921185976928613e-06, "loss": 0.0299, "sft_loss": 0.0037886684003751725, "step": 1265, "total_loss": 0.00400653738573169, "value_loss": 0.0021786899614198775, "value_loss_search": 0.00914538588888263, "value_loss_thought": 0.008284133870392906 }, { "epoch": 7.58, "grad_norm": 0.6290655324172468, "learning_rate": 2.8749025344694653e-06, "loss": 0.0336, "sft_loss": 0.004462181986309588, "step": 1270, "total_loss": 0.004616037011123808, "value_loss": 0.00153855009958761, "value_loss_search": 0.005969772761523018, "value_loss_thought": 0.006338628097728361 }, { "epoch": 7.61, "grad_norm": 0.5333504288117544, "learning_rate": 2.857668263820244e-06, "loss": 0.0303, "sft_loss": 0.003401619120268151, "step": 1275, "total_loss": 0.003593827542800909, "value_loss": 0.0019220843075743233, "value_loss_search": 0.008753876088519519, "value_loss_thought": 0.006622798505304672 }, { "epoch": 7.64, "grad_norm": 0.4126781957882197, "learning_rate": 2.840416622740617e-06, "loss": 0.0295, "sft_loss": 0.004203358304221183, "step": 1280, "total_loss": 0.0044454284535731855, "value_loss": 0.0024207017429930034, "value_loss_search": 0.00895141239620898, "value_loss_thought": 0.010414201496178065 }, { "epoch": 7.67, "grad_norm": 0.636868430333384, "learning_rate": 2.823148449069613e-06, "loss": 0.0317, "sft_loss": 0.0037902468640822915, "step": 1285, "total_loss": 0.004080002412759143, "value_loss": 0.002897555428683063, "value_loss_search": 0.010850950920882951, "value_loss_thought": 0.012329492640174067 }, { "epoch": 7.7, "grad_norm": 0.49524669578109337, "learning_rate": 2.8058645814491784e-06, "loss": 0.0312, "sft_loss": 0.004466524376766756, "step": 1290, "total_loss": 0.00469799009814551, "value_loss": 0.002314657226224881, "value_loss_search": 0.006950648547240234, "value_loss_thought": 0.011566609143937968 }, { "epoch": 7.73, "grad_norm": 0.4873869632029965, "learning_rate": 2.7885658592834488e-06, "loss": 0.032, "sft_loss": 0.004330064181704074, "step": 1295, "total_loss": 0.0045417543143230436, "value_loss": 0.0021169017202510077, "value_loss_search": 0.00826698833628825, "value_loss_thought": 0.00866822535913343 }, { "epoch": 7.76, "grad_norm": 0.5038272789213597, "learning_rate": 2.771253122697981e-06, "loss": 0.0331, "sft_loss": 0.0041820299404207615, "step": 1300, "total_loss": 0.004449167268626297, "value_loss": 0.002671373126996457, "value_loss_search": 0.008917606517729836, "value_loss_thought": 0.012453378505870204 }, { "epoch": 7.79, "grad_norm": 0.5716128545581782, "learning_rate": 2.7539272124989545e-06, "loss": 0.0327, "sft_loss": 0.0034668007108848544, "step": 1305, "total_loss": 0.0037846647005551405, "value_loss": 0.0031786399593784153, "value_loss_search": 0.012202327424117244, "value_loss_thought": 0.013226792128807573 }, { "epoch": 7.82, "grad_norm": 0.7312620161870373, "learning_rate": 2.736588970132333e-06, "loss": 0.032, "sft_loss": 0.0036935678450390696, "step": 1310, "total_loss": 0.003966284790448071, "value_loss": 0.0027271693567854525, "value_loss_search": 0.011858981241562105, "value_loss_thought": 0.00995837363161627 }, { "epoch": 7.85, "grad_norm": 0.48258120287180617, "learning_rate": 2.7192392376430014e-06, "loss": 0.0313, "sft_loss": 0.003437778353691101, "step": 1315, "total_loss": 0.0036498856757901875, "value_loss": 0.0021210731328892506, "value_loss_search": 0.008885550579176994, "value_loss_thought": 0.008083034464357297 }, { "epoch": 7.88, "grad_norm": 0.6553771008550281, "learning_rate": 2.701878857633874e-06, "loss": 0.0328, "sft_loss": 0.002638998458860442, "step": 1320, "total_loss": 0.002854476947356943, "value_loss": 0.0021547851526747762, "value_loss_search": 0.006954838147055398, "value_loss_thought": 0.010283443070898101 }, { "epoch": 7.91, "grad_norm": 0.5831125961756038, "learning_rate": 2.684508673224967e-06, "loss": 0.0348, "sft_loss": 0.004423308192053809, "step": 1325, "total_loss": 0.004638882860058402, "value_loss": 0.002155746738399955, "value_loss_search": 0.009081033444783771, "value_loss_thought": 0.008164940534516062 }, { "epoch": 7.94, "grad_norm": 0.6554790409450288, "learning_rate": 2.6671295280124567e-06, "loss": 0.0322, "sft_loss": 0.003197679913137108, "step": 1330, "total_loss": 0.0033951037816166265, "value_loss": 0.001974238623790825, "value_loss_search": 0.007716037955879074, "value_loss_thought": 0.008077871069599497 }, { "epoch": 7.97, "grad_norm": 0.6464974512156216, "learning_rate": 2.649742266027705e-06, "loss": 0.0309, "sft_loss": 0.0025466441409662368, "step": 1335, "total_loss": 0.002726721732233273, "value_loss": 0.001800775762740159, "value_loss_search": 0.006972815421841005, "value_loss_thought": 0.007433390758114911 }, { "epoch": 8.0, "grad_norm": 0.6803735749099431, "learning_rate": 2.632347731696274e-06, "loss": 0.033, "sft_loss": 0.003279316209955141, "step": 1340, "total_loss": 0.0035762524097776804, "value_loss": 0.002969362228782302, "value_loss_search": 0.008045078085967817, "value_loss_thought": 0.0157098194164746 }, { "epoch": 8.03, "grad_norm": 0.39085071585625647, "learning_rate": 2.6149467697969118e-06, "loss": 0.0225, "sft_loss": 0.002843447361374274, "step": 1345, "total_loss": 0.002981356151798309, "value_loss": 0.001379087858413186, "value_loss_search": 0.004844959436161389, "value_loss_thought": 0.006187743457485339 }, { "epoch": 8.06, "grad_norm": 0.48063989373141985, "learning_rate": 2.597540225420525e-06, "loss": 0.0226, "sft_loss": 0.002574404375627637, "step": 1350, "total_loss": 0.0028025899320653024, "value_loss": 0.0022818553584329493, "value_loss_search": 0.005695947931963019, "value_loss_thought": 0.012558894986273116 }, { "epoch": 8.09, "grad_norm": 0.4206131958945122, "learning_rate": 2.580128943929139e-06, "loss": 0.0212, "sft_loss": 0.002781625863281079, "step": 1355, "total_loss": 0.002963343672460894, "value_loss": 0.0018171783105231042, "value_loss_search": 0.008560673631276928, "value_loss_thought": 0.005976753031427506 }, { "epoch": 8.12, "grad_norm": 0.3653474778372023, "learning_rate": 2.5627137709148386e-06, "loss": 0.0225, "sft_loss": 0.0017891598748974503, "step": 1360, "total_loss": 0.001957193014266068, "value_loss": 0.0016803315540641962, "value_loss_search": 0.006167404261202591, "value_loss_thought": 0.007275248032328818 }, { "epoch": 8.15, "grad_norm": 0.43951845600689593, "learning_rate": 2.5452955521587064e-06, "loss": 0.0225, "sft_loss": 0.0016710011375835165, "step": 1365, "total_loss": 0.0018185687295726894, "value_loss": 0.0014756758409930626, "value_loss_search": 0.006119606431695956, "value_loss_thought": 0.005685800284209108 }, { "epoch": 8.18, "grad_norm": 0.45317276348714697, "learning_rate": 2.5278751335897423e-06, "loss": 0.0238, "sft_loss": 0.0029196401010267437, "step": 1370, "total_loss": 0.0030648296158460654, "value_loss": 0.0014518950408728415, "value_loss_search": 0.0060932497550993505, "value_loss_thought": 0.005521910469178692 }, { "epoch": 8.21, "grad_norm": 0.3857977796232688, "learning_rate": 2.5104533612437816e-06, "loss": 0.0203, "sft_loss": 0.002657680620905012, "step": 1375, "total_loss": 0.0027829522318313592, "value_loss": 0.00125271600504675, "value_loss_search": 0.004111475246963892, "value_loss_thought": 0.005910252820331152 }, { "epoch": 8.24, "grad_norm": 0.43983223503478724, "learning_rate": 2.493031081222406e-06, "loss": 0.0214, "sft_loss": 0.002025950566167012, "step": 1380, "total_loss": 0.0021369275005781673, "value_loss": 0.0011097693309352508, "value_loss_search": 0.003321273262153568, "value_loss_thought": 0.005556881445545514 }, { "epoch": 8.27, "grad_norm": 0.3626207865464384, "learning_rate": 2.475609139651855e-06, "loss": 0.0208, "sft_loss": 0.002262994254124351, "step": 1385, "total_loss": 0.0023927846590680703, "value_loss": 0.0012979039850506524, "value_loss_search": 0.005410684119317466, "value_loss_thought": 0.004972547819033934 }, { "epoch": 8.3, "grad_norm": 0.5204196278274228, "learning_rate": 2.4581883826419294e-06, "loss": 0.0238, "sft_loss": 0.0023898789659142494, "step": 1390, "total_loss": 0.0025361195974028306, "value_loss": 0.0014624062704569952, "value_loss_search": 0.0058827654516449, "value_loss_thought": 0.005816484717388448 }, { "epoch": 8.33, "grad_norm": 0.45142759026857865, "learning_rate": 2.4407696562449006e-06, "loss": 0.0209, "sft_loss": 0.0020229626476066186, "step": 1395, "total_loss": 0.0021993215879206216, "value_loss": 0.0017635896525462157, "value_loss_search": 0.006506465665435712, "value_loss_thought": 0.007602251682465066 }, { "epoch": 8.36, "grad_norm": 0.6201398131026475, "learning_rate": 2.4233538064144226e-06, "loss": 0.0214, "sft_loss": 0.002320754388347268, "step": 1400, "total_loss": 0.0024575029708927333, "value_loss": 0.0013674858636591124, "value_loss_search": 0.004308351113706976, "value_loss_thought": 0.006631535803444421 }, { "epoch": 8.38, "grad_norm": 0.45220431536451633, "learning_rate": 2.4059416789644473e-06, "loss": 0.0224, "sft_loss": 0.0025798780581681056, "step": 1405, "total_loss": 0.00268079744263332, "value_loss": 0.0010091937743709422, "value_loss_search": 0.004291023600296739, "value_loss_thought": 0.0037825266010713676 }, { "epoch": 8.41, "grad_norm": 0.45477594831619567, "learning_rate": 2.388534119528145e-06, "loss": 0.0195, "sft_loss": 0.001965572632616386, "step": 1410, "total_loss": 0.0021669458707378906, "value_loss": 0.002013732181683281, "value_loss_search": 0.006261610782757998, "value_loss_thought": 0.009848246640262914 }, { "epoch": 8.44, "grad_norm": 0.4218999322836366, "learning_rate": 2.3711319735168378e-06, "loss": 0.0219, "sft_loss": 0.002767064847284928, "step": 1415, "total_loss": 0.0029677543869524926, "value_loss": 0.0020068954641317303, "value_loss_search": 0.006902593411530234, "value_loss_thought": 0.00915257024798848 }, { "epoch": 8.47, "grad_norm": 0.39469190101455454, "learning_rate": 2.353736086078941e-06, "loss": 0.0219, "sft_loss": 0.00205565721844323, "step": 1420, "total_loss": 0.0023155218750730453, "value_loss": 0.0025986466305312206, "value_loss_search": 0.009201484014715789, "value_loss_thought": 0.011587689043972204 }, { "epoch": 8.5, "grad_norm": 0.3222692938718468, "learning_rate": 2.336347302058916e-06, "loss": 0.0231, "sft_loss": 0.0034793566446751356, "step": 1425, "total_loss": 0.00360437715615376, "value_loss": 0.0012502055355525954, "value_loss_search": 0.004974942305875629, "value_loss_thought": 0.005026701947775792 }, { "epoch": 8.53, "grad_norm": 0.4599366779305295, "learning_rate": 2.3189664659562442e-06, "loss": 0.024, "sft_loss": 0.002877801636350341, "step": 1430, "total_loss": 0.003031013450221565, "value_loss": 0.0015321182537206823, "value_loss_search": 0.00576030847768152, "value_loss_thought": 0.006496637430245755 }, { "epoch": 8.56, "grad_norm": 0.4315667432642988, "learning_rate": 2.3015944218844063e-06, "loss": 0.022, "sft_loss": 0.002819139277562499, "step": 1435, "total_loss": 0.002962993244301515, "value_loss": 0.0014385397402392642, "value_loss_search": 0.005644433948407368, "value_loss_thought": 0.005863883913934842 }, { "epoch": 8.59, "grad_norm": 0.4745674858393937, "learning_rate": 2.2842320135298946e-06, "loss": 0.0229, "sft_loss": 0.002344584878301248, "step": 1440, "total_loss": 0.0024817303616316622, "value_loss": 0.0013714549205573689, "value_loss_search": 0.006260404985391687, "value_loss_thought": 0.004711234440060252 }, { "epoch": 8.62, "grad_norm": 0.40694918242518996, "learning_rate": 2.2668800841112345e-06, "loss": 0.0229, "sft_loss": 0.00296173918468412, "step": 1445, "total_loss": 0.0030806214503627414, "value_loss": 0.0011888226746123109, "value_loss_search": 0.004837690460476551, "value_loss_thought": 0.004672890894835291 }, { "epoch": 8.65, "grad_norm": 0.4107733649493513, "learning_rate": 2.2495394763380338e-06, "loss": 0.0225, "sft_loss": 0.003584610787220299, "step": 1450, "total_loss": 0.0038719090720064743, "value_loss": 0.0028729828365271714, "value_loss_search": 0.005230140845671372, "value_loss_thought": 0.017753721810265688 }, { "epoch": 8.68, "grad_norm": 0.3728305802169862, "learning_rate": 2.232211032370057e-06, "loss": 0.0227, "sft_loss": 0.0025901119457557797, "step": 1455, "total_loss": 0.0027724159214699284, "value_loss": 0.0018230398026389595, "value_loss_search": 0.006323750824310537, "value_loss_thought": 0.008260567580418866 }, { "epoch": 8.71, "grad_norm": 0.43244392951704247, "learning_rate": 2.2148955937763215e-06, "loss": 0.0202, "sft_loss": 0.0023802727228030562, "step": 1460, "total_loss": 0.0025262993830239113, "value_loss": 0.0014602663856294385, "value_loss_search": 0.004974742010833211, "value_loss_thought": 0.006707389143184628 }, { "epoch": 8.74, "grad_norm": 0.446983159592719, "learning_rate": 2.197594001494232e-06, "loss": 0.0231, "sft_loss": 0.00247747907997109, "step": 1465, "total_loss": 0.0028393455249869247, "value_loss": 0.0036186647702152186, "value_loss_search": 0.005167914255184769, "value_loss_thought": 0.02378140405708109 }, { "epoch": 8.77, "grad_norm": 0.582681357981295, "learning_rate": 2.1803070957887348e-06, "loss": 0.0232, "sft_loss": 0.0029129542876034976, "step": 1470, "total_loss": 0.0030655652646260022, "value_loss": 0.0015261098120788574, "value_loss_search": 0.006358572702845322, "value_loss_thought": 0.005850305858075444 }, { "epoch": 8.8, "grad_norm": 0.4700551072376061, "learning_rate": 2.1630357162115133e-06, "loss": 0.0219, "sft_loss": 0.002174633409595117, "step": 1475, "total_loss": 0.002347504053091143, "value_loss": 0.0017287064572656164, "value_loss_search": 0.008121828264177112, "value_loss_thought": 0.005707823473858298 }, { "epoch": 8.83, "grad_norm": 0.7663496628024437, "learning_rate": 2.1457807015602086e-06, "loss": 0.0234, "sft_loss": 0.0025546713673975318, "step": 1480, "total_loss": 0.00283914315147058, "value_loss": 0.002844717770221905, "value_loss_search": 0.011075520714871345, "value_loss_thought": 0.011682221261048653 }, { "epoch": 8.86, "grad_norm": 0.3411989638023231, "learning_rate": 2.1285428898376907e-06, "loss": 0.0218, "sft_loss": 0.0021655169257428497, "step": 1485, "total_loss": 0.0023130710998373162, "value_loss": 0.0014755416389562015, "value_loss_search": 0.005575540512140265, "value_loss_thought": 0.006228792705860542 }, { "epoch": 8.89, "grad_norm": 0.4283075171896123, "learning_rate": 2.1113231182113557e-06, "loss": 0.0226, "sft_loss": 0.002557673762203194, "step": 1490, "total_loss": 0.002811015537105277, "value_loss": 0.002533417688255213, "value_loss_search": 0.006894818281846682, "value_loss_thought": 0.013372523540647307 }, { "epoch": 8.92, "grad_norm": 0.38025999762981544, "learning_rate": 2.0941222229724683e-06, "loss": 0.0195, "sft_loss": 0.002423516203998588, "step": 1495, "total_loss": 0.0026445118043511686, "value_loss": 0.002209956094793597, "value_loss_search": 0.007773246503208498, "value_loss_thought": 0.00990640217037111 }, { "epoch": 8.95, "grad_norm": 0.4107993684483595, "learning_rate": 2.076941039495545e-06, "loss": 0.023, "sft_loss": 0.002852113952394575, "step": 1500, "total_loss": 0.003029771816866855, "value_loss": 0.0017765785493793374, "value_loss_search": 0.006161500808036635, "value_loss_thought": 0.008051127563931004 }, { "epoch": 8.98, "grad_norm": 0.43206266155631295, "learning_rate": 2.05978040219779e-06, "loss": 0.0219, "sft_loss": 0.0023770652449456977, "step": 1505, "total_loss": 0.002536764156579352, "value_loss": 0.0015969893091551056, "value_loss_search": 0.0054944734949231135, "value_loss_thought": 0.007281441086252016 }, { "epoch": 9.01, "grad_norm": 0.374824172192575, "learning_rate": 2.0426411444985622e-06, "loss": 0.0212, "sft_loss": 0.0025530525454087183, "step": 1510, "total_loss": 0.0026753786481890527, "value_loss": 0.0012232610420596756, "value_loss_search": 0.004044807156776642, "value_loss_thought": 0.005741281189693836 }, { "epoch": 9.04, "grad_norm": 0.5103310551708304, "learning_rate": 2.0255240987789077e-06, "loss": 0.017, "sft_loss": 0.002143319571041502, "step": 1515, "total_loss": 0.0022470162215711296, "value_loss": 0.001036966439151854, "value_loss_search": 0.004559061944007681, "value_loss_thought": 0.0037366695483342484 }, { "epoch": 9.07, "grad_norm": 0.41407771047339026, "learning_rate": 2.008430096341129e-06, "loss": 0.0165, "sft_loss": 0.00211839419498574, "step": 1520, "total_loss": 0.002260489938532828, "value_loss": 0.0014209576240091337, "value_loss_search": 0.006807764833172314, "value_loss_thought": 0.004559896182763623 }, { "epoch": 9.1, "grad_norm": 0.3998377123999996, "learning_rate": 1.991359967368416e-06, "loss": 0.0167, "sft_loss": 0.0017885153938550502, "step": 1525, "total_loss": 0.0019043610838139103, "value_loss": 0.001158456964958532, "value_loss_search": 0.004520953930921223, "value_loss_thought": 0.004746701816475252 }, { "epoch": 9.13, "grad_norm": 0.25993528776663594, "learning_rate": 1.974314540884522e-06, "loss": 0.0168, "sft_loss": 0.0022823128500021996, "step": 1530, "total_loss": 0.0023883844661270357, "value_loss": 0.0010607163117128948, "value_loss_search": 0.003205415800539413, "value_loss_thought": 0.0052803147056920356 }, { "epoch": 9.16, "grad_norm": 0.28607830106364285, "learning_rate": 1.9572946447135087e-06, "loss": 0.017, "sft_loss": 0.0020253795781172814, "step": 1535, "total_loss": 0.002180098254166296, "value_loss": 0.0015471866376628896, "value_loss_search": 0.005289423008980521, "value_loss_thought": 0.007088070128247637 }, { "epoch": 9.19, "grad_norm": 0.36665251321589426, "learning_rate": 1.9403011054395372e-06, "loss": 0.0176, "sft_loss": 0.0020406075345817953, "step": 1540, "total_loss": 0.00214596866593979, "value_loss": 0.0010536111770306888, "value_loss_search": 0.003708674301128667, "value_loss_thought": 0.004720214986241445 }, { "epoch": 9.22, "grad_norm": 0.33701179906071765, "learning_rate": 1.923334748366727e-06, "loss": 0.0164, "sft_loss": 0.0018277755152666941, "step": 1545, "total_loss": 0.001965286196420379, "value_loss": 0.0013751067914199665, "value_loss_search": 0.0051922910111670715, "value_loss_thought": 0.005808563233449604 }, { "epoch": 9.25, "grad_norm": 0.30304308100827854, "learning_rate": 1.9063963974790715e-06, "loss": 0.0171, "sft_loss": 0.0021657033037627118, "step": 1550, "total_loss": 0.0022887821434451894, "value_loss": 0.0012307884190931873, "value_loss_search": 0.004165462106902851, "value_loss_thought": 0.005680845198685347 }, { "epoch": 9.28, "grad_norm": 0.348110330157348, "learning_rate": 1.8894868754004247e-06, "loss": 0.0168, "sft_loss": 0.002240948341204785, "step": 1555, "total_loss": 0.002330947991924148, "value_loss": 0.000899996584598739, "value_loss_search": 0.0027729876618877826, "value_loss_thought": 0.00442698502301937 }, { "epoch": 9.31, "grad_norm": 0.3510042973090616, "learning_rate": 1.8726070033545468e-06, "loss": 0.0176, "sft_loss": 0.0018620806687977165, "step": 1560, "total_loss": 0.0019822285716736944, "value_loss": 0.0012014789214504162, "value_loss_search": 0.0045417374156954795, "value_loss_thought": 0.005070093995891511 }, { "epoch": 9.34, "grad_norm": 0.2988369790384324, "learning_rate": 1.855757601125221e-06, "loss": 0.0168, "sft_loss": 0.0019012396631296724, "step": 1565, "total_loss": 0.0020716833577807845, "value_loss": 0.001704436970885581, "value_loss_search": 0.006682152269536346, "value_loss_thought": 0.006953343548593694 }, { "epoch": 9.37, "grad_norm": 0.4000228004615745, "learning_rate": 1.8389394870164418e-06, "loss": 0.0174, "sft_loss": 0.0017132473614765332, "step": 1570, "total_loss": 0.0018083037684050395, "value_loss": 0.0009505639430244627, "value_loss_search": 0.004880918659534927, "value_loss_thought": 0.0027235929035441587 }, { "epoch": 9.4, "grad_norm": 0.3335584079653667, "learning_rate": 1.8221534778126712e-06, "loss": 0.016, "sft_loss": 0.0019671204237965865, "step": 1575, "total_loss": 0.0020535163486698595, "value_loss": 0.0008639591912242394, "value_loss_search": 0.0034627173671083256, "value_loss_thought": 0.003448956180272944 }, { "epoch": 9.43, "grad_norm": 0.3026400110385097, "learning_rate": 1.8054003887391727e-06, "loss": 0.0166, "sft_loss": 0.002014622194110416, "step": 1580, "total_loss": 0.0021511696702731344, "value_loss": 0.001365474661497501, "value_loss_search": 0.004294166664863042, "value_loss_thought": 0.006629630598598624 }, { "epoch": 9.46, "grad_norm": 0.43139165457443246, "learning_rate": 1.7886810334224192e-06, "loss": 0.0163, "sft_loss": 0.0021359879698138683, "step": 1585, "total_loss": 0.0022444051660613696, "value_loss": 0.0010841718215033325, "value_loss_search": 0.004590211787228782, "value_loss_thought": 0.004083162726351475 }, { "epoch": 9.49, "grad_norm": 0.34009988215077, "learning_rate": 1.7719962238505779e-06, "loss": 0.0166, "sft_loss": 0.002009772404562682, "step": 1590, "total_loss": 0.0022419645182367278, "value_loss": 0.0023219212426283777, "value_loss_search": 0.0064497248539396425, "value_loss_thought": 0.01212564545467103 }, { "epoch": 9.52, "grad_norm": 0.29633516039709396, "learning_rate": 1.7553467703340755e-06, "loss": 0.017, "sft_loss": 0.0015611476090271025, "step": 1595, "total_loss": 0.0016733453244299312, "value_loss": 0.0011219771564128678, "value_loss_search": 0.0053791521318601095, "value_loss_thought": 0.0035966651016906327 }, { "epoch": 9.55, "grad_norm": 0.38962016205008826, "learning_rate": 1.7387334814662452e-06, "loss": 0.0168, "sft_loss": 0.002254475053632632, "step": 1600, "total_loss": 0.0023654911761866516, "value_loss": 0.0011101611622962083, "value_loss_search": 0.004612163749004594, "value_loss_thought": 0.004269125514247208 }, { "epoch": 9.58, "grad_norm": 0.38388365774795874, "learning_rate": 1.7221571640840562e-06, "loss": 0.0176, "sft_loss": 0.0018735320656560362, "step": 1605, "total_loss": 0.0019748406046375066, "value_loss": 0.0010130854097496922, "value_loss_search": 0.003405629392977971, "value_loss_thought": 0.004699053899685168 }, { "epoch": 9.61, "grad_norm": 0.4095939751375467, "learning_rate": 1.7056186232289298e-06, "loss": 0.0166, "sft_loss": 0.0022918267059139907, "step": 1610, "total_loss": 0.0024106179324689947, "value_loss": 0.001187912198918184, "value_loss_search": 0.004366835134328539, "value_loss_thought": 0.005136462485006632 }, { "epoch": 9.64, "grad_norm": 0.3302901304739707, "learning_rate": 1.6891186621076433e-06, "loss": 0.0186, "sft_loss": 0.002024157461710274, "step": 1615, "total_loss": 0.0021383855524845785, "value_loss": 0.001142280875455981, "value_loss_search": 0.004602078883806371, "value_loss_thought": 0.004536168186041323 }, { "epoch": 9.67, "grad_norm": 0.3679502879592282, "learning_rate": 1.6726580820533155e-06, "loss": 0.0159, "sft_loss": 0.0017695592978270724, "step": 1620, "total_loss": 0.0018712303529298425, "value_loss": 0.0010167105092577344, "value_loss_search": 0.003538572788136207, "value_loss_thought": 0.004595111271009955 }, { "epoch": 9.7, "grad_norm": 0.30533989281219753, "learning_rate": 1.6562376824864985e-06, "loss": 0.0166, "sft_loss": 0.0025343591201817616, "step": 1625, "total_loss": 0.002648697585539139, "value_loss": 0.0011433845557576206, "value_loss_search": 0.004449716299927786, "value_loss_thought": 0.004697360047975963 }, { "epoch": 9.73, "grad_norm": 0.2877518184615656, "learning_rate": 1.6398582608763457e-06, "loss": 0.0179, "sft_loss": 0.0024212473537772892, "step": 1630, "total_loss": 0.0025349426502771165, "value_loss": 0.0011369530704428144, "value_loss_search": 0.0051415300209441735, "value_loss_thought": 0.003954094471146164 }, { "epoch": 9.76, "grad_norm": 0.36027684134993015, "learning_rate": 1.6235206127018865e-06, "loss": 0.016, "sft_loss": 0.002206899574957788, "step": 1635, "total_loss": 0.0022886144271467403, "value_loss": 0.000817148587839256, "value_loss_search": 0.003036996565958816, "value_loss_thought": 0.003500192180490558 }, { "epoch": 9.79, "grad_norm": 0.35076644651015965, "learning_rate": 1.6072255314133921e-06, "loss": 0.0173, "sft_loss": 0.0019144602090818807, "step": 1640, "total_loss": 0.002069194418021425, "value_loss": 0.0015473420381795222, "value_loss_search": 0.005170887146491054, "value_loss_thought": 0.007207849150842094 }, { "epoch": 9.82, "grad_norm": 0.30979208746416814, "learning_rate": 1.5909738083938387e-06, "loss": 0.0181, "sft_loss": 0.0023195294284960254, "step": 1645, "total_loss": 0.002506226720114313, "value_loss": 0.0018669727418881622, "value_loss_search": 0.004914225066238486, "value_loss_thought": 0.010021556961669375 }, { "epoch": 9.85, "grad_norm": 0.3414912343606952, "learning_rate": 1.5747662329204758e-06, "loss": 0.0164, "sft_loss": 0.0017500042042229325, "step": 1650, "total_loss": 0.0018422375416491832, "value_loss": 0.0009223333461136462, "value_loss_search": 0.003468296695177742, "value_loss_thought": 0.003910370041808164 }, { "epoch": 9.88, "grad_norm": 0.34143690574927327, "learning_rate": 1.5586035921264952e-06, "loss": 0.0167, "sft_loss": 0.001885048404801637, "step": 1655, "total_loss": 0.0019954120705705236, "value_loss": 0.001103636745813219, "value_loss_search": 0.004034641608018319, "value_loss_thought": 0.004794452414535045 }, { "epoch": 9.91, "grad_norm": 0.3940780464860908, "learning_rate": 1.5424866709628018e-06, "loss": 0.0167, "sft_loss": 0.0023443336365744473, "step": 1660, "total_loss": 0.0025089123803297753, "value_loss": 0.001645787319603187, "value_loss_search": 0.005139281411049979, "value_loss_thought": 0.008027016961932532 }, { "epoch": 9.94, "grad_norm": 0.28513052875466016, "learning_rate": 1.5264162521598893e-06, "loss": 0.017, "sft_loss": 0.002210353355621919, "step": 1665, "total_loss": 0.0023033933971760233, "value_loss": 0.0009304003870511223, "value_loss_search": 0.004236812041176563, "value_loss_thought": 0.00320639110132106 }, { "epoch": 9.97, "grad_norm": 0.6554785763050915, "learning_rate": 1.5103931161898321e-06, "loss": 0.017, "sft_loss": 0.001787349657388404, "step": 1670, "total_loss": 0.0019071295019472245, "value_loss": 0.0011977983157066773, "value_loss_search": 0.003837722380649211, "value_loss_thought": 0.0057446641365231695 }, { "epoch": 10.0, "grad_norm": 0.47343410940009084, "learning_rate": 1.4944180412283765e-06, "loss": 0.0173, "sft_loss": 0.0018310324900085106, "step": 1675, "total_loss": 0.001928851098625728, "value_loss": 0.0009781861556859895, "value_loss_search": 0.00370205105889454, "value_loss_thought": 0.004123438105875721 }, { "epoch": 10.03, "grad_norm": 0.3191231605103231, "learning_rate": 1.4784918031171507e-06, "loss": 0.0138, "sft_loss": 0.0015405306039610878, "step": 1680, "total_loss": 0.0016421698385045147, "value_loss": 0.0010163923268578401, "value_loss_search": 0.0045566821872171205, "value_loss_thought": 0.003574456366766299 }, { "epoch": 10.06, "grad_norm": 0.19891894504058574, "learning_rate": 1.4626151753259826e-06, "loss": 0.0138, "sft_loss": 0.001970075577264652, "step": 1685, "total_loss": 0.0020514145978665966, "value_loss": 0.0008133902773010959, "value_loss_search": 0.0033379198979332616, "value_loss_thought": 0.003169202328041365 }, { "epoch": 10.09, "grad_norm": 0.2844490093173974, "learning_rate": 1.4467889289153372e-06, "loss": 0.0132, "sft_loss": 0.0011790773802204057, "step": 1690, "total_loss": 0.0012578035701309887, "value_loss": 0.0007872618921737739, "value_loss_search": 0.0035334496231712365, "value_loss_thought": 0.0027646455130707183 }, { "epoch": 10.12, "grad_norm": 0.22486418307955425, "learning_rate": 1.4310138324988727e-06, "loss": 0.0132, "sft_loss": 0.0016656344232615083, "step": 1695, "total_loss": 0.0017502345744446757, "value_loss": 0.0008460015417313116, "value_loss_search": 0.0036587552590958694, "value_loss_thought": 0.0031092570511646045 }, { "epoch": 10.15, "grad_norm": 0.2753979527731448, "learning_rate": 1.415290652206105e-06, "loss": 0.0143, "sft_loss": 0.0017361613281536847, "step": 1700, "total_loss": 0.0018177182257247183, "value_loss": 0.0008155690053840203, "value_loss_search": 0.003209375508242829, "value_loss_thought": 0.003315176499722838 }, { "epoch": 10.18, "grad_norm": 0.2572570727278164, "learning_rate": 1.3996201516452062e-06, "loss": 0.0137, "sft_loss": 0.001388478121953085, "step": 1705, "total_loss": 0.0014834851837221663, "value_loss": 0.0009500706352980615, "value_loss_search": 0.0030097221551613983, "value_loss_thought": 0.004590843019104796 }, { "epoch": 10.21, "grad_norm": 0.27025436045593615, "learning_rate": 1.3840030918659174e-06, "loss": 0.0147, "sft_loss": 0.0013848344882717357, "step": 1710, "total_loss": 0.0014918723345459738, "value_loss": 0.0010703784395445838, "value_loss_search": 0.0053198799042775136, "value_loss_thought": 0.0032431475258817956 }, { "epoch": 10.23, "grad_norm": 0.2813750257238534, "learning_rate": 1.3684402313225858e-06, "loss": 0.014, "sft_loss": 0.0020835736999288202, "step": 1715, "total_loss": 0.002167087908361509, "value_loss": 0.0008351421248391944, "value_loss_search": 0.003797487074473338, "value_loss_thought": 0.0028836499267299587 }, { "epoch": 10.26, "grad_norm": 0.23513053578061044, "learning_rate": 1.3529323258373347e-06, "loss": 0.0156, "sft_loss": 0.001715753084863536, "step": 1720, "total_loss": 0.0017993840303944352, "value_loss": 0.0008363092606941791, "value_loss_search": 0.0030788765871307077, "value_loss_thought": 0.003611597566623459 }, { "epoch": 10.29, "grad_norm": 0.23227494152180417, "learning_rate": 1.3374801285633498e-06, "loss": 0.014, "sft_loss": 0.001370953200967051, "step": 1725, "total_loss": 0.0014732162911762713, "value_loss": 0.0010226307487414487, "value_loss_search": 0.0036572990339521993, "value_loss_thought": 0.004523746974246024 }, { "epoch": 10.32, "grad_norm": 0.27072849780059804, "learning_rate": 1.3220843899483093e-06, "loss": 0.0132, "sft_loss": 0.002192886942066252, "step": 1730, "total_loss": 0.0022738821006910827, "value_loss": 0.0008099516685206254, "value_loss_search": 0.0027381081928751884, "value_loss_thought": 0.0037415051008338196 }, { "epoch": 10.35, "grad_norm": 0.23754108921707667, "learning_rate": 1.3067458576979305e-06, "loss": 0.0137, "sft_loss": 0.0013820198219036683, "step": 1735, "total_loss": 0.0014648040350152768, "value_loss": 0.0008278420881822513, "value_loss_search": 0.002561309584873328, "value_loss_thought": 0.004061427062742951 }, { "epoch": 10.38, "grad_norm": 0.36161029888838353, "learning_rate": 1.2914652767396602e-06, "loss": 0.0134, "sft_loss": 0.0016668380645569413, "step": 1740, "total_loss": 0.0017722387631607718, "value_loss": 0.0010540070087955654, "value_loss_search": 0.003300642109496721, "value_loss_thought": 0.005131414054389882 }, { "epoch": 10.41, "grad_norm": 0.2974708087969646, "learning_rate": 1.2762433891865e-06, "loss": 0.0143, "sft_loss": 0.0014268789207562804, "step": 1745, "total_loss": 0.0015154178811258134, "value_loss": 0.0008853895097672648, "value_loss_search": 0.0027578251025261124, "value_loss_thought": 0.004325291005079635 }, { "epoch": 10.44, "grad_norm": 0.3676755156415561, "learning_rate": 1.2610809343009588e-06, "loss": 0.0153, "sft_loss": 0.0014077324420213699, "step": 1750, "total_loss": 0.0015097191839743118, "value_loss": 0.001019867208327696, "value_loss_search": 0.003453613588283133, "value_loss_thought": 0.004705324086262408 }, { "epoch": 10.47, "grad_norm": 0.35317661644500653, "learning_rate": 1.2459786484591535e-06, "loss": 0.0138, "sft_loss": 0.0014058848028071225, "step": 1755, "total_loss": 0.0014983110793011178, "value_loss": 0.0009242627733442532, "value_loss_search": 0.0036805602864205867, "value_loss_thought": 0.0037135419420565086 }, { "epoch": 10.5, "grad_norm": 0.3090797575849808, "learning_rate": 1.2309372651150456e-06, "loss": 0.0143, "sft_loss": 0.0013164847245207057, "step": 1760, "total_loss": 0.0014013042361533223, "value_loss": 0.0008481952301963247, "value_loss_search": 0.0033867947210950433, "value_loss_thought": 0.0033987670644137326 }, { "epoch": 10.53, "grad_norm": 0.3792937092146306, "learning_rate": 1.2159575147648226e-06, "loss": 0.0138, "sft_loss": 0.0010253429325530305, "step": 1765, "total_loss": 0.0011080630618408803, "value_loss": 0.0008272012292422914, "value_loss_search": 0.003076424640460118, "value_loss_thought": 0.0035411851821436358 }, { "epoch": 10.56, "grad_norm": 0.23588251371295496, "learning_rate": 1.2010401249114166e-06, "loss": 0.0154, "sft_loss": 0.002121089934371412, "step": 1770, "total_loss": 0.002200343585804987, "value_loss": 0.0007925363796857709, "value_loss_search": 0.0028656800206931623, "value_loss_thought": 0.0034746110001947273 }, { "epoch": 10.59, "grad_norm": 0.18704537996101944, "learning_rate": 1.1861858200291754e-06, "loss": 0.015, "sft_loss": 0.001497958108666353, "step": 1775, "total_loss": 0.0015780455702179453, "value_loss": 0.0008008746483028518, "value_loss_search": 0.003131278493640366, "value_loss_thought": 0.0032757186703292972 }, { "epoch": 10.62, "grad_norm": 0.2611143768462882, "learning_rate": 1.1713953215286786e-06, "loss": 0.0146, "sft_loss": 0.0016566726000746713, "step": 1780, "total_loss": 0.0017335941357572437, "value_loss": 0.000769215394677758, "value_loss_search": 0.0035173230212080854, "value_loss_thought": 0.0026364001052343157 }, { "epoch": 10.65, "grad_norm": 0.22627379212344326, "learning_rate": 1.156669347721698e-06, "loss": 0.0139, "sft_loss": 0.0013101978547638282, "step": 1785, "total_loss": 0.0014014345401903937, "value_loss": 0.0009123669141331448, "value_loss_search": 0.0029923125522913095, "value_loss_thought": 0.004306622803687788 }, { "epoch": 10.68, "grad_norm": 0.2755772125737796, "learning_rate": 1.1420086137863187e-06, "loss": 0.0147, "sft_loss": 0.0017199999798322096, "step": 1790, "total_loss": 0.0018377338240952667, "value_loss": 0.0011773384410275866, "value_loss_search": 0.004846148737328804, "value_loss_thought": 0.004572558852294151 }, { "epoch": 10.71, "grad_norm": 0.32880460241811366, "learning_rate": 1.127413831732198e-06, "loss": 0.0139, "sft_loss": 0.001667250582249835, "step": 1795, "total_loss": 0.0017713467202923993, "value_loss": 0.0010409614306340132, "value_loss_search": 0.0038816104845068367, "value_loss_thought": 0.00444608086813787 }, { "epoch": 10.74, "grad_norm": 0.31078844068026645, "learning_rate": 1.1128857103659924e-06, "loss": 0.0148, "sft_loss": 0.0020905163779389112, "step": 1800, "total_loss": 0.0021922153910395536, "value_loss": 0.0010169901736389875, "value_loss_search": 0.0040109903552888685, "value_loss_thought": 0.004124930962098006 }, { "epoch": 10.77, "grad_norm": 0.25337131057461965, "learning_rate": 1.098424955256929e-06, "loss": 0.0146, "sft_loss": 0.0014486652828054503, "step": 1805, "total_loss": 0.0015391261362040609, "value_loss": 0.0009046085265993042, "value_loss_search": 0.0039972110742269255, "value_loss_thought": 0.0032396571875779046 }, { "epoch": 10.8, "grad_norm": 0.27541225586561385, "learning_rate": 1.084032268702546e-06, "loss": 0.016, "sft_loss": 0.0018587965198094026, "step": 1810, "total_loss": 0.0019315761407113995, "value_loss": 0.0007277963281012489, "value_loss_search": 0.002842529457825549, "value_loss_thought": 0.0029798412312629806 }, { "epoch": 10.83, "grad_norm": 0.3143782682726898, "learning_rate": 1.0697083496945766e-06, "loss": 0.0147, "sft_loss": 0.0016238773765508085, "step": 1815, "total_loss": 0.0017014974511752711, "value_loss": 0.0007762006878408556, "value_loss_search": 0.003306770018977545, "value_loss_thought": 0.002902835555346428 }, { "epoch": 10.86, "grad_norm": 0.20528023876815246, "learning_rate": 1.0554538938850067e-06, "loss": 0.0127, "sft_loss": 0.0010624434828059748, "step": 1820, "total_loss": 0.0011552063171336613, "value_loss": 0.0009276282694486327, "value_loss_search": 0.0039089625636478335, "value_loss_thought": 0.0035120635659950496 }, { "epoch": 10.89, "grad_norm": 0.3193867192070165, "learning_rate": 1.0412695935522915e-06, "loss": 0.0139, "sft_loss": 0.0015649513312382623, "step": 1825, "total_loss": 0.001644154928186481, "value_loss": 0.0007920360109665125, "value_loss_search": 0.003427566871346244, "value_loss_thought": 0.0029087212127251404 }, { "epoch": 10.92, "grad_norm": 0.2485596631221815, "learning_rate": 1.0271561375677295e-06, "loss": 0.0141, "sft_loss": 0.0012402831809595228, "step": 1830, "total_loss": 0.0013410342163723498, "value_loss": 0.0010075102685675574, "value_loss_search": 0.0032375619698996163, "value_loss_thought": 0.004822520132569253 }, { "epoch": 10.95, "grad_norm": 0.19629796573946343, "learning_rate": 1.0131142113620124e-06, "loss": 0.0145, "sft_loss": 0.0017903442290844395, "step": 1835, "total_loss": 0.0018783345309895338, "value_loss": 0.0008799031274634217, "value_loss_search": 0.002568782726038421, "value_loss_thought": 0.00447044234929308 }, { "epoch": 10.98, "grad_norm": 0.22665789788022606, "learning_rate": 9.991444968919318e-07, "loss": 0.016, "sft_loss": 0.001909774899831973, "step": 1840, "total_loss": 0.0020184893559246574, "value_loss": 0.0010871445186012353, "value_loss_search": 0.0036259381746958754, "value_loss_thought": 0.005071217814185047 }, { "epoch": 11.01, "grad_norm": 0.14121531206089535, "learning_rate": 9.85247672607262e-07, "loss": 0.0135, "sft_loss": 0.0015843365341424941, "step": 1845, "total_loss": 0.0016597445963839163, "value_loss": 0.0007540804324889904, "value_loss_search": 0.00248998072215727, "value_loss_thought": 0.0035426627241349705 }, { "epoch": 11.04, "grad_norm": 0.23904440865103438, "learning_rate": 9.714244134178111e-07, "loss": 0.0121, "sft_loss": 0.001681867046863772, "step": 1850, "total_loss": 0.0017365749472503466, "value_loss": 0.000547078986141969, "value_loss_search": 0.002276956337368574, "value_loss_thought": 0.0020996756104807446 }, { "epoch": 11.07, "grad_norm": 0.21369722290951315, "learning_rate": 9.576753906606406e-07, "loss": 0.0132, "sft_loss": 0.001991357470978983, "step": 1855, "total_loss": 0.0020535454949708763, "value_loss": 0.0006218802197111017, "value_loss_search": 0.002578609814440824, "value_loss_thought": 0.002396431967076751 }, { "epoch": 11.1, "grad_norm": 0.23349687558798843, "learning_rate": 9.440012720674669e-07, "loss": 0.0134, "sft_loss": 0.0016908970777876676, "step": 1860, "total_loss": 0.0017617013153142125, "value_loss": 0.0007080423307911588, "value_loss_search": 0.002591455921628949, "value_loss_thought": 0.003072882712245928 }, { "epoch": 11.13, "grad_norm": 0.2315106358683564, "learning_rate": 9.304027217322248e-07, "loss": 0.012, "sft_loss": 0.0011010443093255162, "step": 1865, "total_loss": 0.001180766790723453, "value_loss": 0.0007972249053409541, "value_loss_search": 0.0028063702089980323, "value_loss_thought": 0.003571429059320508 }, { "epoch": 11.16, "grad_norm": 0.19676507857751727, "learning_rate": 9.168804000788231e-07, "loss": 0.0134, "sft_loss": 0.0017542458925163373, "step": 1870, "total_loss": 0.0018465204406766134, "value_loss": 0.0009227454697338544, "value_loss_search": 0.0034655480293684705, "value_loss_thought": 0.003916415683534069 }, { "epoch": 11.19, "grad_norm": 0.20994293249210688, "learning_rate": 9.034349638290643e-07, "loss": 0.0129, "sft_loss": 0.0016363047616323456, "step": 1875, "total_loss": 0.001720267212360227, "value_loss": 0.000839624490978963, "value_loss_search": 0.0023668420385547506, "value_loss_thought": 0.004350153919847344 }, { "epoch": 11.22, "grad_norm": 0.17348271881719035, "learning_rate": 8.90067065970753e-07, "loss": 0.0117, "sft_loss": 0.0015204125025775283, "step": 1880, "total_loss": 0.00157785642841759, "value_loss": 0.0005744392913584306, "value_loss_search": 0.002059218621488412, "value_loss_thought": 0.0025362957071592973 }, { "epoch": 11.25, "grad_norm": 0.2244106678448663, "learning_rate": 8.767773557259856e-07, "loss": 0.0136, "sft_loss": 0.0018200392310973256, "step": 1885, "total_loss": 0.0018952935279571647, "value_loss": 0.0007525429511019866, "value_loss_search": 0.0032326324541486428, "value_loss_thought": 0.002787711161090556 }, { "epoch": 11.28, "grad_norm": 0.22278002454347204, "learning_rate": 8.635664785196149e-07, "loss": 0.0136, "sft_loss": 0.001562042610021308, "step": 1890, "total_loss": 0.0016376418375557479, "value_loss": 0.0007559922805285169, "value_loss_search": 0.002203879163846523, "value_loss_thought": 0.0038440591039261562 }, { "epoch": 11.31, "grad_norm": 0.16580402380208306, "learning_rate": 8.504350759479085e-07, "loss": 0.0132, "sft_loss": 0.001630876283161342, "step": 1895, "total_loss": 0.0017215145897331752, "value_loss": 0.0009063829461979367, "value_loss_search": 0.0032823619387357893, "value_loss_thought": 0.0039687016303560085 }, { "epoch": 11.34, "grad_norm": 0.1538178503850748, "learning_rate": 8.373837857473876e-07, "loss": 0.0124, "sft_loss": 0.0015245357761159539, "step": 1900, "total_loss": 0.001594149377508103, "value_loss": 0.0006961359632555286, "value_loss_search": 0.0029453484618215953, "value_loss_thought": 0.0026237392520670256 }, { "epoch": 11.37, "grad_norm": 0.22852979054995423, "learning_rate": 8.244132417638572e-07, "loss": 0.0139, "sft_loss": 0.0018581276090117171, "step": 1905, "total_loss": 0.0019236322099402514, "value_loss": 0.0006550459199843317, "value_loss_search": 0.002184331477405976, "value_loss_thought": 0.0030560358827074197 }, { "epoch": 11.4, "grad_norm": 0.17979282429716514, "learning_rate": 8.115240739216182e-07, "loss": 0.0135, "sft_loss": 0.0016053789819125085, "step": 1910, "total_loss": 0.0016754228590059484, "value_loss": 0.0007004386914104543, "value_loss_search": 0.0024565162927842723, "value_loss_thought": 0.003146993195559844 }, { "epoch": 11.43, "grad_norm": 0.2143766598366511, "learning_rate": 7.987169081928808e-07, "loss": 0.0113, "sft_loss": 0.0012951746117323636, "step": 1915, "total_loss": 0.0013838895008234432, "value_loss": 0.0008871489512898734, "value_loss_search": 0.0030084542974748276, "value_loss_thought": 0.004088737367703743 }, { "epoch": 11.46, "grad_norm": 0.16901429392294579, "learning_rate": 7.859923665673577e-07, "loss": 0.0138, "sft_loss": 0.0017941196507308631, "step": 1920, "total_loss": 0.0018495924976690502, "value_loss": 0.0005547285868487961, "value_loss_search": 0.002195003875567636, "value_loss_thought": 0.0022428248138567143 }, { "epoch": 11.49, "grad_norm": 0.14963676330146067, "learning_rate": 7.733510670220592e-07, "loss": 0.0127, "sft_loss": 0.0016209140827413647, "step": 1925, "total_loss": 0.0016745970182368453, "value_loss": 0.0005368294205496226, "value_loss_search": 0.0021613026585526997, "value_loss_thought": 0.0021333327193019612 }, { "epoch": 11.52, "grad_norm": 0.1560856898990678, "learning_rate": 7.607936234912841e-07, "loss": 0.0128, "sft_loss": 0.0012635959719773382, "step": 1930, "total_loss": 0.0013598649700924171, "value_loss": 0.0009626899960949231, "value_loss_search": 0.003141054221191553, "value_loss_thought": 0.004560465800022939 }, { "epoch": 11.55, "grad_norm": 0.1963172359932867, "learning_rate": 7.48320645836797e-07, "loss": 0.013, "sft_loss": 0.001730994725949131, "step": 1935, "total_loss": 0.0018184438404333036, "value_loss": 0.0008744911316171056, "value_loss_search": 0.0031509772675512695, "value_loss_thought": 0.003844951791325002 }, { "epoch": 11.58, "grad_norm": 0.2668333028531531, "learning_rate": 7.359327398182145e-07, "loss": 0.0129, "sft_loss": 0.0013168820936698467, "step": 1940, "total_loss": 0.0013926543368199874, "value_loss": 0.0007577225714385349, "value_loss_search": 0.003037126008916857, "value_loss_thought": 0.003024654608429955 }, { "epoch": 11.61, "grad_norm": 0.222868660291957, "learning_rate": 7.236305070635835e-07, "loss": 0.0143, "sft_loss": 0.0016936144500505179, "step": 1945, "total_loss": 0.0017581287969392178, "value_loss": 0.0006451433262554928, "value_loss_search": 0.0023534947292660037, "value_loss_thought": 0.002807651879811601 }, { "epoch": 11.64, "grad_norm": 0.361284370578401, "learning_rate": 7.114145450401666e-07, "loss": 0.0128, "sft_loss": 0.001547012195806019, "step": 1950, "total_loss": 0.001618750787525869, "value_loss": 0.000717385778938251, "value_loss_search": 0.0024508829355568197, "value_loss_thought": 0.0032882033142414002 }, { "epoch": 11.67, "grad_norm": 0.13552700047134153, "learning_rate": 6.992854470254207e-07, "loss": 0.013, "sft_loss": 0.001385022871545516, "step": 1955, "total_loss": 0.0014542039817172282, "value_loss": 0.0006918109331763844, "value_loss_search": 0.0027121378843503407, "value_loss_thought": 0.0028223496028431327 }, { "epoch": 11.7, "grad_norm": 0.22110946390490138, "learning_rate": 6.872438020781855e-07, "loss": 0.0133, "sft_loss": 0.0019112714886432513, "step": 1960, "total_loss": 0.0021918389610931397, "value_loss": 0.002805674577484751, "value_loss_search": 0.0025011180584272098, "value_loss_thought": 0.01994427887461825 }, { "epoch": 11.73, "grad_norm": 0.19717560920606997, "learning_rate": 6.752901950100796e-07, "loss": 0.0135, "sft_loss": 0.0014557878545019775, "step": 1965, "total_loss": 0.0015295138876126658, "value_loss": 0.0007372604449756182, "value_loss_search": 0.0031169248416858864, "value_loss_thought": 0.0027811587181304277 }, { "epoch": 11.76, "grad_norm": 0.19011787464804747, "learning_rate": 6.634252063570909e-07, "loss": 0.0133, "sft_loss": 0.0018082802678691223, "step": 1970, "total_loss": 0.0018666912741366558, "value_loss": 0.0005841100078200157, "value_loss_search": 0.0022486982484906546, "value_loss_thought": 0.0024241818446625986 }, { "epoch": 11.79, "grad_norm": 0.1689996116493542, "learning_rate": 6.516494123513911e-07, "loss": 0.0121, "sft_loss": 0.0014497063646558672, "step": 1975, "total_loss": 0.0015385708714291014, "value_loss": 0.0008886449425062892, "value_loss_search": 0.0024655752657849915, "value_loss_thought": 0.0046435843172275785 }, { "epoch": 11.82, "grad_norm": 0.15943109781856352, "learning_rate": 6.399633848933434e-07, "loss": 0.0133, "sft_loss": 0.0014925144583685323, "step": 1980, "total_loss": 0.0015526211048978666, "value_loss": 0.0006010664543737221, "value_loss_search": 0.0027871506780570597, "value_loss_thought": 0.0020213810148561606 }, { "epoch": 11.85, "grad_norm": 0.16717165760347577, "learning_rate": 6.283676915237307e-07, "loss": 0.013, "sft_loss": 0.001696960357367061, "step": 1985, "total_loss": 0.0017603883717299596, "value_loss": 0.0006342800129914394, "value_loss_search": 0.002824323731066869, "value_loss_thought": 0.0022499163406337177 }, { "epoch": 11.88, "grad_norm": 0.15142680254994328, "learning_rate": 6.16862895396193e-07, "loss": 0.0139, "sft_loss": 0.001581608026754111, "step": 1990, "total_loss": 0.0016601021626520662, "value_loss": 0.0007849412620771545, "value_loss_search": 0.002948937653894745, "value_loss_thought": 0.003330592447127856 }, { "epoch": 11.91, "grad_norm": 0.19276901421529766, "learning_rate": 6.054495552498779e-07, "loss": 0.0137, "sft_loss": 0.0018120755994459614, "step": 1995, "total_loss": 0.001886463062464827, "value_loss": 0.0007438746942170837, "value_loss_search": 0.0030197886319228927, "value_loss_thought": 0.0029312088817277982 }, { "epoch": 11.94, "grad_norm": 0.15841124656299638, "learning_rate": 5.941282253823019e-07, "loss": 0.0124, "sft_loss": 0.0017132775596110151, "step": 2000, "total_loss": 0.0018226395098338345, "value_loss": 0.0010936193576071673, "value_loss_search": 0.004504057419785568, "value_loss_thought": 0.004244897459489039 }, { "epoch": 11.97, "grad_norm": 0.19080328469455396, "learning_rate": 5.828994556224333e-07, "loss": 0.0133, "sft_loss": 0.0016100038104923443, "step": 2005, "total_loss": 0.001695921379121046, "value_loss": 0.0008591757061367389, "value_loss_search": 0.0032462079869560513, "value_loss_thought": 0.0036271976513944535 }, { "epoch": 12.0, "grad_norm": 0.42540669262730385, "learning_rate": 5.717637913039895e-07, "loss": 0.0142, "sft_loss": 0.0014130673225736246, "step": 2010, "total_loss": 0.001474345298233004, "value_loss": 0.0006127797149346748, "value_loss_search": 0.0022098295013165624, "value_loss_thought": 0.002692408211157726 }, { "epoch": 12.03, "grad_norm": 0.13234597224013306, "learning_rate": 5.607217732389503e-07, "loss": 0.0134, "sft_loss": 0.00153753467311617, "step": 2015, "total_loss": 0.0015983398959235728, "value_loss": 0.0006080522839738478, "value_loss_search": 0.002334933211577095, "value_loss_thought": 0.002529485058130376 }, { "epoch": 12.06, "grad_norm": 0.137160131767248, "learning_rate": 5.497739376912956e-07, "loss": 0.0109, "sft_loss": 0.0011861874081660062, "step": 2020, "total_loss": 0.0012396710289571899, "value_loss": 0.0005348361521200218, "value_loss_search": 0.0019204338413487676, "value_loss_thought": 0.002358255369199469 }, { "epoch": 12.09, "grad_norm": 0.12635424611743207, "learning_rate": 5.389208163509585e-07, "loss": 0.0132, "sft_loss": 0.0018190979899372905, "step": 2025, "total_loss": 0.001879164192882854, "value_loss": 0.0006006620240896155, "value_loss_search": 0.0024004459903380847, "value_loss_thought": 0.002404850223280164 }, { "epoch": 12.11, "grad_norm": 0.18160270722099203, "learning_rate": 5.281629363080054e-07, "loss": 0.0122, "sft_loss": 0.0015674298861995339, "step": 2030, "total_loss": 0.001623824713238875, "value_loss": 0.0005639481589923889, "value_loss_search": 0.002164925300516529, "value_loss_thought": 0.002346659959630415 }, { "epoch": 12.14, "grad_norm": 0.1417342739770937, "learning_rate": 5.175008200270368e-07, "loss": 0.0125, "sft_loss": 0.0014136525540379807, "step": 2035, "total_loss": 0.0014688616164136193, "value_loss": 0.0005520905897583361, "value_loss_search": 0.0017787457187345979, "value_loss_thought": 0.0026379790163218787 }, { "epoch": 12.17, "grad_norm": 0.1290358512992214, "learning_rate": 5.06934985321813e-07, "loss": 0.0113, "sft_loss": 0.0016105213377159088, "step": 2040, "total_loss": 0.0016679565724615486, "value_loss": 0.0005743524450053883, "value_loss_search": 0.0021123728056977598, "value_loss_thought": 0.0024824467762755375 }, { "epoch": 12.2, "grad_norm": 0.17303760430312853, "learning_rate": 4.964659453301088e-07, "loss": 0.0125, "sft_loss": 0.0017713219043798746, "step": 2045, "total_loss": 0.001820996348112658, "value_loss": 0.0004967445772763313, "value_loss_search": 0.0021500766970802944, "value_loss_thought": 0.0018238799311802723 }, { "epoch": 12.23, "grad_norm": 0.13656860205410482, "learning_rate": 4.860942084887868e-07, "loss": 0.012, "sft_loss": 0.0011710747960023582, "step": 2050, "total_loss": 0.0012223293429954652, "value_loss": 0.0005125455333654827, "value_loss_search": 0.0025454429445233018, "value_loss_thought": 0.0015549213211500045 }, { "epoch": 12.26, "grad_norm": 0.17146623033662603, "learning_rate": 4.758202785091118e-07, "loss": 0.0122, "sft_loss": 0.0015416829177411274, "step": 2055, "total_loss": 0.0015948826805882276, "value_loss": 0.0005319975216764306, "value_loss_search": 0.0020023466028078474, "value_loss_thought": 0.002253633565032942 }, { "epoch": 12.29, "grad_norm": 0.13131858410389194, "learning_rate": 4.656446543522822e-07, "loss": 0.0114, "sft_loss": 0.0015963483776431531, "step": 2060, "total_loss": 0.0016545026784314132, "value_loss": 0.000581543093721848, "value_loss_search": 0.002123642799631398, "value_loss_thought": 0.00252870192825867 }, { "epoch": 12.32, "grad_norm": 0.1387225576415455, "learning_rate": 4.555678302051988e-07, "loss": 0.0125, "sft_loss": 0.0011560205864952877, "step": 2065, "total_loss": 0.0012139756489808918, "value_loss": 0.0005795506685103647, "value_loss_search": 0.0022142785405321773, "value_loss_thought": 0.0024221268018152385 }, { "epoch": 12.35, "grad_norm": 0.13838555141205885, "learning_rate": 4.4559029545646835e-07, "loss": 0.0128, "sft_loss": 0.0019845320377498863, "step": 2070, "total_loss": 0.002043302868487018, "value_loss": 0.0005877082873098516, "value_loss_search": 0.001886277649805379, "value_loss_thought": 0.0028153886438985866 }, { "epoch": 12.38, "grad_norm": 0.1372443532006686, "learning_rate": 4.357125346726293e-07, "loss": 0.013, "sft_loss": 0.001509127317694947, "step": 2075, "total_loss": 0.0015627349823276403, "value_loss": 0.0005360765391628775, "value_loss_search": 0.0020487196703186327, "value_loss_thought": 0.0022398926420464705 }, { "epoch": 12.41, "grad_norm": 0.13844786045080867, "learning_rate": 4.2593502757462326e-07, "loss": 0.0122, "sft_loss": 0.0017603133688680827, "step": 2080, "total_loss": 0.0018168458402463728, "value_loss": 0.0005653246701513126, "value_loss_search": 0.0017796060631980026, "value_loss_thought": 0.0027429913518062675 }, { "epoch": 12.44, "grad_norm": 0.13268164718583428, "learning_rate": 4.162582490144948e-07, "loss": 0.0134, "sft_loss": 0.001744911610148847, "step": 2085, "total_loss": 0.001791581775782447, "value_loss": 0.0004667015931431706, "value_loss_search": 0.002076370053646315, "value_loss_thought": 0.0016572426828588504 }, { "epoch": 12.47, "grad_norm": 0.1639448919467817, "learning_rate": 4.066826689523329e-07, "loss": 0.0124, "sft_loss": 0.0014559761038981378, "step": 2090, "total_loss": 0.0015073618816018098, "value_loss": 0.0005138578154856077, "value_loss_search": 0.0021935081386459387, "value_loss_thought": 0.0019173543787928792 }, { "epoch": 12.5, "grad_norm": 0.12404791787908588, "learning_rate": 3.972087524334417e-07, "loss": 0.0129, "sft_loss": 0.0014528931671520696, "step": 2095, "total_loss": 0.0015010984525360982, "value_loss": 0.00048205279412059097, "value_loss_search": 0.0017939364208864107, "value_loss_thought": 0.002062485937221936 }, { "epoch": 12.53, "grad_norm": 0.11913134655746999, "learning_rate": 3.8783695956576104e-07, "loss": 0.0124, "sft_loss": 0.0014827497507212684, "step": 2100, "total_loss": 0.001538775590775998, "value_loss": 0.0005602583627023705, "value_loss_search": 0.0021692122728495635, "value_loss_thought": 0.0023128546302928045 }, { "epoch": 12.56, "grad_norm": 0.2114888612576018, "learning_rate": 3.785677454975162e-07, "loss": 0.0129, "sft_loss": 0.0016362351918360219, "step": 2105, "total_loss": 0.0016834996304623928, "value_loss": 0.00047264444830261707, "value_loss_search": 0.0016264815088106843, "value_loss_thought": 0.002154674075870844 }, { "epoch": 12.59, "grad_norm": 0.15216955248234498, "learning_rate": 3.6940156039511536e-07, "loss": 0.0131, "sft_loss": 0.0011432622733991594, "step": 2110, "total_loss": 0.001217301837471041, "value_loss": 0.0007403955911058802, "value_loss_search": 0.003449674524733837, "value_loss_thought": 0.00247349016779026 }, { "epoch": 12.62, "grad_norm": 0.16263392439324273, "learning_rate": 3.603388494212892e-07, "loss": 0.013, "sft_loss": 0.0014238910138374195, "step": 2115, "total_loss": 0.001483250133594538, "value_loss": 0.0005935910872267413, "value_loss_search": 0.0018694708225950763, "value_loss_thought": 0.0028792579010541884 }, { "epoch": 12.65, "grad_norm": 0.1366455163894168, "learning_rate": 3.5138005271346643e-07, "loss": 0.0125, "sft_loss": 0.0016499412042321638, "step": 2120, "total_loss": 0.001711774785312059, "value_loss": 0.0006183358492023672, "value_loss_search": 0.002253155542811669, "value_loss_thought": 0.002693531273280314 }, { "epoch": 12.68, "grad_norm": 0.1723979401773517, "learning_rate": 3.425256053624013e-07, "loss": 0.0136, "sft_loss": 0.0012428588001057506, "step": 2125, "total_loss": 0.0012996991815896308, "value_loss": 0.0005684036681486759, "value_loss_search": 0.002460998028504946, "value_loss_thought": 0.002086231320936349 }, { "epoch": 12.71, "grad_norm": 0.13838742752770622, "learning_rate": 3.3377593739104207e-07, "loss": 0.0124, "sft_loss": 0.001705869528814219, "step": 2130, "total_loss": 0.0017658297125933587, "value_loss": 0.0005996018458176878, "value_loss_search": 0.0025955964968829904, "value_loss_thought": 0.002201218291384066 }, { "epoch": 12.74, "grad_norm": 0.11587188180305923, "learning_rate": 3.2513147373364864e-07, "loss": 0.0119, "sft_loss": 0.001407682741410099, "step": 2135, "total_loss": 0.0014551524436541285, "value_loss": 0.0004746971244514953, "value_loss_search": 0.0019987275512676206, "value_loss_thought": 0.0017988494690371227 }, { "epoch": 12.77, "grad_norm": 0.14689001469256818, "learning_rate": 3.165926342151518e-07, "loss": 0.0123, "sft_loss": 0.0012021351605653763, "step": 2140, "total_loss": 0.0012947583980004395, "value_loss": 0.0009262323781513259, "value_loss_search": 0.002333370926893963, "value_loss_thought": 0.005076488072023722 }, { "epoch": 12.8, "grad_norm": 0.19822181831478017, "learning_rate": 3.0815983353076647e-07, "loss": 0.0128, "sft_loss": 0.0016310029430314898, "step": 2145, "total_loss": 0.0016860840906559814, "value_loss": 0.0005508116572968902, "value_loss_search": 0.0021066489629220086, "value_loss_thought": 0.0022998442878133575 }, { "epoch": 12.83, "grad_norm": 0.16816840054773924, "learning_rate": 2.998334812258524e-07, "loss": 0.0108, "sft_loss": 0.0014653883612481878, "step": 2150, "total_loss": 0.0015446662450301573, "value_loss": 0.0007927789123186813, "value_loss_search": 0.0023768133905377907, "value_loss_thought": 0.00396541793236338 }, { "epoch": 12.86, "grad_norm": 0.16375843499526804, "learning_rate": 2.9161398167602053e-07, "loss": 0.0126, "sft_loss": 0.001740490208612755, "step": 2155, "total_loss": 0.0018277797405071184, "value_loss": 0.0008728954095772679, "value_loss_search": 0.0033813179442745422, "value_loss_thought": 0.003601845331900222 }, { "epoch": 12.89, "grad_norm": 0.24414748866003813, "learning_rate": 2.8350173406749975e-07, "loss": 0.0131, "sft_loss": 0.0017086814332287759, "step": 2160, "total_loss": 0.001792904332711487, "value_loss": 0.0008422289131431171, "value_loss_search": 0.002163193000239971, "value_loss_thought": 0.0045746383284949845 }, { "epoch": 12.92, "grad_norm": 0.14738499094459379, "learning_rate": 2.75497132377745e-07, "loss": 0.0126, "sft_loss": 0.0015413039014674722, "step": 2165, "total_loss": 0.0015857949202086274, "value_loss": 0.00044491018940107097, "value_loss_search": 0.0015283264680988396, "value_loss_thought": 0.0020309550552781276 }, { "epoch": 12.95, "grad_norm": 0.1314803478632567, "learning_rate": 2.676005653563063e-07, "loss": 0.0122, "sft_loss": 0.0012932573270518332, "step": 2170, "total_loss": 0.0013801320299876353, "value_loss": 0.0008687470835752719, "value_loss_search": 0.0031730859160461478, "value_loss_thought": 0.0037768907095369285 }, { "epoch": 12.98, "grad_norm": 0.15101872666959495, "learning_rate": 2.5981241650594736e-07, "loss": 0.0116, "sft_loss": 0.001474944083020091, "step": 2175, "total_loss": 0.0015574592757964466, "value_loss": 0.0008251519188775091, "value_loss_search": 0.0024292909915487825, "value_loss_thought": 0.00417192430927571 }, { "epoch": 13.01, "grad_norm": 0.12826566707799528, "learning_rate": 2.5213306406402263e-07, "loss": 0.0133, "sft_loss": 0.0017561075917910784, "step": 2180, "total_loss": 0.0018071690113764306, "value_loss": 0.0005106141129658682, "value_loss_search": 0.0019689877941118537, "value_loss_thought": 0.0021159251197332197 }, { "epoch": 13.04, "grad_norm": 0.1709754478860301, "learning_rate": 2.445628809841055e-07, "loss": 0.0114, "sft_loss": 0.0015144431439694018, "step": 2185, "total_loss": 0.001573674503174516, "value_loss": 0.0005923136392084416, "value_loss_search": 0.001896365256311583, "value_loss_thought": 0.0028421438521490927 }, { "epoch": 13.07, "grad_norm": 0.10695694103625902, "learning_rate": 2.3710223491787643e-07, "loss": 0.0115, "sft_loss": 0.0011580413149204106, "step": 2190, "total_loss": 0.0012181214089466152, "value_loss": 0.0006008008974731638, "value_loss_search": 0.0022067354780915594, "value_loss_thought": 0.0025996716371764705 }, { "epoch": 13.1, "grad_norm": 0.1223481366934227, "learning_rate": 2.2975148819726844e-07, "loss": 0.0123, "sft_loss": 0.0013828211929649114, "step": 2195, "total_loss": 0.001459511714705286, "value_loss": 0.0007669052273740817, "value_loss_search": 0.0029846215529801155, "value_loss_thought": 0.0031506202247840063 }, { "epoch": 13.13, "grad_norm": 0.12035969601588177, "learning_rate": 2.2251099781686853e-07, "loss": 0.0118, "sft_loss": 0.0010231956985080615, "step": 2200, "total_loss": 0.0010796000485257195, "value_loss": 0.0005640435443638126, "value_loss_search": 0.0022480406605694726, "value_loss_thought": 0.0022643077172460834 }, { "epoch": 13.16, "grad_norm": 0.173715525744218, "learning_rate": 2.1538111541658246e-07, "loss": 0.0115, "sft_loss": 0.0012041608191793785, "step": 2205, "total_loss": 0.0012736963247903077, "value_loss": 0.0006953551075753239, "value_loss_search": 0.0023029572450525395, "value_loss_thought": 0.0032598836100532933 }, { "epoch": 13.19, "grad_norm": 0.12560183999239857, "learning_rate": 2.0836218726455416e-07, "loss": 0.0116, "sft_loss": 0.001454232243122533, "step": 2210, "total_loss": 0.0015192492540819559, "value_loss": 0.0006501699334421574, "value_loss_search": 0.0016968745266581209, "value_loss_thought": 0.0035044849046244053 }, { "epoch": 13.22, "grad_norm": 0.15088975343987443, "learning_rate": 2.0145455424035065e-07, "loss": 0.0123, "sft_loss": 0.0014402579807210713, "step": 2215, "total_loss": 0.001486383965337268, "value_loss": 0.00046125984255240837, "value_loss_search": 0.0020269616570260498, "value_loss_thought": 0.001663117084353871 }, { "epoch": 13.25, "grad_norm": 0.1168644695587691, "learning_rate": 1.9465855181840742e-07, "loss": 0.0123, "sft_loss": 0.0016895142354769633, "step": 2220, "total_loss": 0.0017325070250297614, "value_loss": 0.0004299280713553344, "value_loss_search": 0.0017563865643637655, "value_loss_thought": 0.0016830380049214 }, { "epoch": 13.28, "grad_norm": 0.12969058359006697, "learning_rate": 1.8797451005173384e-07, "loss": 0.0126, "sft_loss": 0.0016237141971942037, "step": 2225, "total_loss": 0.0016805375176829785, "value_loss": 0.000568233198055168, "value_loss_search": 0.0019997224272742644, "value_loss_thought": 0.002546143160088832 }, { "epoch": 13.31, "grad_norm": 0.1464573687686116, "learning_rate": 1.8140275355588682e-07, "loss": 0.0119, "sft_loss": 0.0013673121546162292, "step": 2230, "total_loss": 0.0014293207376852024, "value_loss": 0.0006200857808948967, "value_loss_search": 0.0028769256222062724, "value_loss_thought": 0.0020837606152554144 }, { "epoch": 13.34, "grad_norm": 0.13416087510039532, "learning_rate": 1.749436014932021e-07, "loss": 0.0131, "sft_loss": 0.001349821488838643, "step": 2235, "total_loss": 0.001453561357747901, "value_loss": 0.0010373986635158871, "value_loss_search": 0.003940903465206702, "value_loss_thought": 0.004358286027468239 }, { "epoch": 13.37, "grad_norm": 0.12129590354158527, "learning_rate": 1.68597367557298e-07, "loss": 0.0122, "sft_loss": 0.001293862346210517, "step": 2240, "total_loss": 0.0013660816371590557, "value_loss": 0.000722192872768801, "value_loss_search": 0.002515532513518792, "value_loss_thought": 0.0032620103815133917 }, { "epoch": 13.4, "grad_norm": 0.13830951441212483, "learning_rate": 1.6236435995783644e-07, "loss": 0.0122, "sft_loss": 0.0014756130083696916, "step": 2245, "total_loss": 0.0015344153451877674, "value_loss": 0.000588023400473503, "value_loss_search": 0.0021232158583757155, "value_loss_thought": 0.0025809713747094063 }, { "epoch": 13.43, "grad_norm": 0.14177549511437526, "learning_rate": 1.5624488140555673e-07, "loss": 0.0122, "sft_loss": 0.001884979850728996, "step": 2250, "total_loss": 0.0019392464295002298, "value_loss": 0.0005426657894744835, "value_loss_search": 0.002217422648698175, "value_loss_thought": 0.0021239036757833675 }, { "epoch": 13.46, "grad_norm": 0.1357612842710405, "learning_rate": 1.5023922909757543e-07, "loss": 0.0116, "sft_loss": 0.001019277679733932, "step": 2255, "total_loss": 0.0010746703279515657, "value_loss": 0.000553926424036888, "value_loss_search": 0.0021177719290335515, "value_loss_thought": 0.002313639441763371 }, { "epoch": 13.49, "grad_norm": 0.1299080051431472, "learning_rate": 1.44347694702949e-07, "loss": 0.0122, "sft_loss": 0.0013602351624285801, "step": 2260, "total_loss": 0.001403163997850676, "value_loss": 0.0004292882472554993, "value_loss_search": 0.0016838242217204424, "value_loss_thought": 0.0017504817547660423 }, { "epoch": 13.52, "grad_norm": 0.1286701282026923, "learning_rate": 1.3857056434851301e-07, "loss": 0.0116, "sft_loss": 0.0010856040549697354, "step": 2265, "total_loss": 0.001160654007901485, "value_loss": 0.0007504995341719223, "value_loss_search": 0.002396246173121419, "value_loss_thought": 0.003607750001219756 }, { "epoch": 13.55, "grad_norm": 0.14396728510258697, "learning_rate": 1.3290811860498242e-07, "loss": 0.0108, "sft_loss": 0.0011329900531563907, "step": 2270, "total_loss": 0.001192187089957031, "value_loss": 0.0005919702982509989, "value_loss_search": 0.001987281997116952, "value_loss_thought": 0.0027484804012374298 }, { "epoch": 13.58, "grad_norm": 0.11133502723535868, "learning_rate": 1.273606324733284e-07, "loss": 0.0119, "sft_loss": 0.0012918035121401773, "step": 2275, "total_loss": 0.0013533333825648697, "value_loss": 0.0006152988007769977, "value_loss_search": 0.0025145169366169286, "value_loss_thought": 0.002407873464107979 }, { "epoch": 13.61, "grad_norm": 0.12100244913687518, "learning_rate": 1.2192837537142065e-07, "loss": 0.0115, "sft_loss": 0.0015881910978350789, "step": 2280, "total_loss": 0.0016699408407248484, "value_loss": 0.0008174974953490732, "value_loss_search": 0.0026218705085824467, "value_loss_thought": 0.003918109554024341 }, { "epoch": 13.64, "grad_norm": 0.1389992640143122, "learning_rate": 1.1661161112094421e-07, "loss": 0.0116, "sft_loss": 0.0014317982335342095, "step": 2285, "total_loss": 0.0014963600385272003, "value_loss": 0.0006456180733948713, "value_loss_search": 0.0020672334404252977, "value_loss_thought": 0.0030977111006450287 }, { "epoch": 13.67, "grad_norm": 0.1376806904958747, "learning_rate": 1.1141059793458586e-07, "loss": 0.0135, "sft_loss": 0.001531593399704434, "step": 2290, "total_loss": 0.0015879040782849074, "value_loss": 0.0005631069248011045, "value_loss_search": 0.002264327982754821, "value_loss_thought": 0.002240527437061246 }, { "epoch": 13.7, "grad_norm": 0.15507672061398864, "learning_rate": 1.0632558840349333e-07, "loss": 0.0127, "sft_loss": 0.001709194021532312, "step": 2295, "total_loss": 0.0017596675465483714, "value_loss": 0.0005047351388384414, "value_loss_search": 0.0021137851630555816, "value_loss_thought": 0.0019240959423768799 }, { "epoch": 13.73, "grad_norm": 0.13950654690195688, "learning_rate": 1.0135682948501146e-07, "loss": 0.011, "sft_loss": 0.0016408312105340884, "step": 2300, "total_loss": 0.0016937724493836016, "value_loss": 0.0005294122824125225, "value_loss_search": 0.002247820197192141, "value_loss_thought": 0.0019874780593738704 }, { "epoch": 13.76, "grad_norm": 0.14603248892583517, "learning_rate": 9.650456249068268e-08, "loss": 0.0125, "sft_loss": 0.001476616770378314, "step": 2305, "total_loss": 0.001532993128402893, "value_loss": 0.0005637636299979931, "value_loss_search": 0.0019370973135977465, "value_loss_thought": 0.0025730117005196007 }, { "epoch": 13.79, "grad_norm": 0.12093820892491877, "learning_rate": 9.176902307453328e-08, "loss": 0.0123, "sft_loss": 0.0016192490846151486, "step": 2310, "total_loss": 0.0016830127960190567, "value_loss": 0.0006376370715429402, "value_loss_search": 0.0025056022790977293, "value_loss_thought": 0.0025954942909493183 }, { "epoch": 13.82, "grad_norm": 0.13585785284804588, "learning_rate": 8.715044122162508e-08, "loss": 0.0127, "sft_loss": 0.0017002749460516497, "step": 2315, "total_loss": 0.0017550808576103805, "value_loss": 0.0005480592571984744, "value_loss_search": 0.0021303763422451994, "value_loss_thought": 0.002254097702507352 }, { "epoch": 13.85, "grad_norm": 0.14486121709588692, "learning_rate": 8.264904123688745e-08, "loss": 0.0119, "sft_loss": 0.001309369836235419, "step": 2320, "total_loss": 0.0013705807045170104, "value_loss": 0.0006121086411894794, "value_loss_search": 0.0019892391615371706, "value_loss_thought": 0.002907629985912763 }, { "epoch": 13.88, "grad_norm": 0.13756062164141314, "learning_rate": 7.826504173422372e-08, "loss": 0.0131, "sft_loss": 0.0015762085182359441, "step": 2325, "total_loss": 0.0016776628372952018, "value_loss": 0.0010145431685145922, "value_loss_search": 0.0035052792621627304, "value_loss_thought": 0.004611065951957016 }, { "epoch": 13.91, "grad_norm": 0.12390462036780822, "learning_rate": 7.399865562589315e-08, "loss": 0.0131, "sft_loss": 0.0017432003776775673, "step": 2330, "total_loss": 0.0018063452240141941, "value_loss": 0.0006314483902201574, "value_loss_search": 0.001986952345464488, "value_loss_thought": 0.003064634781657105 }, { "epoch": 13.94, "grad_norm": 0.1357904093704358, "learning_rate": 6.985009011217209e-08, "loss": 0.0129, "sft_loss": 0.001526657902286388, "step": 2335, "total_loss": 0.0015822424648092692, "value_loss": 0.0005558455008667807, "value_loss_search": 0.0019112108780859672, "value_loss_thought": 0.0025355531076371564 }, { "epoch": 13.96, "grad_norm": 0.12173848490891062, "learning_rate": 6.581954667128965e-08, "loss": 0.0125, "sft_loss": 0.0019005106441909447, "step": 2340, "total_loss": 0.0019535693316782956, "value_loss": 0.0005305868885898235, "value_loss_search": 0.0019408738701571337, "value_loss_thought": 0.0023038212573055715 }, { "epoch": 13.99, "grad_norm": 0.12311131791939463, "learning_rate": 6.190722104964436e-08, "loss": 0.0132, "sft_loss": 0.0012315514002693817, "step": 2345, "total_loss": 0.0012908000820957, "value_loss": 0.0005924867479691898, "value_loss_search": 0.002214185446916872, "value_loss_thought": 0.0025257085178168382 }, { "epoch": 14.02, "grad_norm": 0.11289506914779934, "learning_rate": 5.811330325229569e-08, "loss": 0.012, "sft_loss": 0.0014274256362114101, "step": 2350, "total_loss": 0.00148404497326311, "value_loss": 0.0005661933894430149, "value_loss_search": 0.0018998756612177204, "value_loss_thought": 0.0026296714639101994 }, { "epoch": 14.05, "grad_norm": 0.14032918438561062, "learning_rate": 5.443797753373864e-08, "loss": 0.012, "sft_loss": 0.001353855719207786, "step": 2355, "total_loss": 0.0014032512601801273, "value_loss": 0.0004939554979955574, "value_loss_search": 0.0016622686555592737, "value_loss_thought": 0.0022893753313837804 }, { "epoch": 14.08, "grad_norm": 0.1302537325621518, "learning_rate": 5.0881422388952275e-08, "loss": 0.012, "sft_loss": 0.0009611410961952061, "step": 2360, "total_loss": 0.001024993611773084, "value_loss": 0.0006385252608311021, "value_loss_search": 0.0032244599921341433, "value_loss_thought": 0.0018837421045418523 }, { "epoch": 14.11, "grad_norm": 0.12164242099090507, "learning_rate": 4.7443810544734456e-08, "loss": 0.0116, "sft_loss": 0.0013725335855269804, "step": 2365, "total_loss": 0.0014293210130739808, "value_loss": 0.0005678743107637274, "value_loss_search": 0.002384104471229875, "value_loss_thought": 0.00215889002156473 }, { "epoch": 14.14, "grad_norm": 0.163492619946634, "learning_rate": 4.412530895131051e-08, "loss": 0.0115, "sft_loss": 0.0012715687771560624, "step": 2370, "total_loss": 0.001382276511756686, "value_loss": 0.0011070772634695913, "value_loss_search": 0.0019688921961233063, "value_loss_thought": 0.006887725907620279 }, { "epoch": 14.17, "grad_norm": 0.1481763234968163, "learning_rate": 4.092607877422578e-08, "loss": 0.0111, "sft_loss": 0.0013375403970712796, "step": 2375, "total_loss": 0.0014074473611998429, "value_loss": 0.0006990696618686343, "value_loss_search": 0.0029466108325550524, "value_loss_thought": 0.0026459465245352474 }, { "epoch": 14.2, "grad_norm": 0.1186453351073309, "learning_rate": 3.784627538652025e-08, "loss": 0.0112, "sft_loss": 0.0014332133869174868, "step": 2380, "total_loss": 0.0014889124539600117, "value_loss": 0.0005569907084790771, "value_loss_search": 0.002353395393970459, "value_loss_thought": 0.002102530315414697 }, { "epoch": 14.23, "grad_norm": 0.13825618980337176, "learning_rate": 3.488604836117987e-08, "loss": 0.0113, "sft_loss": 0.0014167566667310893, "step": 2385, "total_loss": 0.0014698782767844421, "value_loss": 0.0005312162420523237, "value_loss_search": 0.0019338703364951471, "value_loss_thought": 0.002315859655487884 }, { "epoch": 14.26, "grad_norm": 0.13774279281432664, "learning_rate": 3.204554146387456e-08, "loss": 0.0121, "sft_loss": 0.0012910763907711953, "step": 2390, "total_loss": 0.0013450498239109265, "value_loss": 0.0005397343376785102, "value_loss_search": 0.0018899123013738973, "value_loss_thought": 0.002427962389350569 }, { "epoch": 14.29, "grad_norm": 0.13075550190644245, "learning_rate": 2.9324892645975766e-08, "loss": 0.0125, "sft_loss": 0.0016997230239212513, "step": 2395, "total_loss": 0.0017409329679125563, "value_loss": 0.0004120994811614764, "value_loss_search": 0.0016292081562824024, "value_loss_thought": 0.0016675876831413917 }, { "epoch": 14.32, "grad_norm": 0.12955374374537487, "learning_rate": 2.67242340378554e-08, "loss": 0.013, "sft_loss": 0.0015193151630228385, "step": 2400, "total_loss": 0.0015746116821185653, "value_loss": 0.0005529651271899638, "value_loss_search": 0.0023058261356823095, "value_loss_thought": 0.0021178948942633725 }, { "epoch": 14.35, "grad_norm": 0.13785257512036658, "learning_rate": 2.4243691942471004e-08, "loss": 0.0129, "sft_loss": 0.0013197832508012653, "step": 2405, "total_loss": 0.001368108755303865, "value_loss": 0.00048325501416002226, "value_loss_search": 0.002034584193029332, "value_loss_thought": 0.0018314558808242508 }, { "epoch": 14.38, "grad_norm": 0.1248830545164438, "learning_rate": 2.1883386829229802e-08, "loss": 0.0117, "sft_loss": 0.0015428359998622908, "step": 2410, "total_loss": 0.0015903647321067638, "value_loss": 0.0004752873185424278, "value_loss_search": 0.0019664257517064245, "value_loss_thought": 0.0018358727832037403 }, { "epoch": 14.41, "grad_norm": 0.1284155399497688, "learning_rate": 1.9643433328139507e-08, "loss": 0.0129, "sft_loss": 0.0015311524126445874, "step": 2415, "total_loss": 0.0015765634343665625, "value_loss": 0.0004541101951872406, "value_loss_search": 0.0018012137584236144, "value_loss_thought": 0.0018316678463634161 }, { "epoch": 14.44, "grad_norm": 0.13722313083098495, "learning_rate": 1.7523940224239422e-08, "loss": 0.0129, "sft_loss": 0.0018140839121770114, "step": 2420, "total_loss": 0.001872726490603327, "value_loss": 0.0005864257679718321, "value_loss_search": 0.0018046398709088863, "value_loss_thought": 0.002886766302572141 }, { "epoch": 14.47, "grad_norm": 0.13659337122897194, "learning_rate": 1.5525010452319966e-08, "loss": 0.0118, "sft_loss": 0.001406003290321678, "step": 2425, "total_loss": 0.0014668999268792505, "value_loss": 0.0006089662623253389, "value_loss_search": 0.0023407790422652398, "value_loss_thought": 0.0025309510473562114 }, { "epoch": 14.5, "grad_norm": 0.1630202519081285, "learning_rate": 1.3646741091920546e-08, "loss": 0.012, "sft_loss": 0.0018367367767496035, "step": 2430, "total_loss": 0.0018952648396265205, "value_loss": 0.0005852805467498001, "value_loss_search": 0.0021444437501230594, "value_loss_thought": 0.002537800629784215 }, { "epoch": 14.53, "grad_norm": 0.14649930933179126, "learning_rate": 1.1889223362616664e-08, "loss": 0.0137, "sft_loss": 0.0014180621423292906, "step": 2435, "total_loss": 0.0014755390043148964, "value_loss": 0.0005747684329776348, "value_loss_search": 0.001847173216401643, "value_loss_thought": 0.0027509742275924507 }, { "epoch": 14.56, "grad_norm": 0.12468828325929393, "learning_rate": 1.0252542619589856e-08, "loss": 0.0123, "sft_loss": 0.0015723185992101208, "step": 2440, "total_loss": 0.0016369965853844093, "value_loss": 0.0006467797803679787, "value_loss_search": 0.0025067227985736905, "value_loss_thought": 0.0026675153821770436 }, { "epoch": 14.59, "grad_norm": 0.14711025562960323, "learning_rate": 8.736778349480723e-09, "loss": 0.0119, "sft_loss": 0.0015009303140686824, "step": 2445, "total_loss": 0.0015552314092317943, "value_loss": 0.0005430109402368543, "value_loss_search": 0.0018667381095156088, "value_loss_thought": 0.0024773493929330925 }, { "epoch": 14.62, "grad_norm": 0.1173625792384152, "learning_rate": 7.3420041665303585e-09, "loss": 0.0118, "sft_loss": 0.0015992191329132766, "step": 2450, "total_loss": 0.0016605654201441666, "value_loss": 0.0006134628767767936, "value_loss_search": 0.002318676908919315, "value_loss_thought": 0.00258902612285965 }, { "epoch": 14.65, "grad_norm": 0.13451903100649834, "learning_rate": 6.068287809004314e-09, "loss": 0.0121, "sft_loss": 0.0015388225147034973, "step": 2455, "total_loss": 0.0015945610022775724, "value_loss": 0.0005573847084178851, "value_loss_search": 0.0020913115042260344, "value_loss_thought": 0.002367766158249651 }, { "epoch": 14.68, "grad_norm": 0.1166996348736506, "learning_rate": 4.915691135903566e-09, "loss": 0.0115, "sft_loss": 0.001615592051530257, "step": 2460, "total_loss": 0.0016700670589443688, "value_loss": 0.0005447498988303323, "value_loss_search": 0.0028115076490621504, "value_loss_thought": 0.0015464915640791333 }, { "epoch": 14.71, "grad_norm": 0.1505633716850507, "learning_rate": 3.884270123959144e-09, "loss": 0.0114, "sft_loss": 0.0012577687215525658, "step": 2465, "total_loss": 0.0013172450172078242, "value_loss": 0.0005947628893864021, "value_loss_search": 0.002692494276323032, "value_loss_thought": 0.0020656088718169485 }, { "epoch": 14.74, "grad_norm": 0.1334895803298696, "learning_rate": 2.9740748649145778e-09, "loss": 0.0121, "sft_loss": 0.0015028521651402117, "step": 2470, "total_loss": 0.0015599074833644977, "value_loss": 0.0005705531071839686, "value_loss_search": 0.0018976363004298946, "value_loss_thought": 0.0026667885652045696 }, { "epoch": 14.77, "grad_norm": 0.11676293297773671, "learning_rate": 2.1851495630928475e-09, "loss": 0.0121, "sft_loss": 0.0011438517773058265, "step": 2475, "total_loss": 0.001199561754755507, "value_loss": 0.000557099866716726, "value_loss_search": 0.002204984583852365, "value_loss_thought": 0.002251814359776905 }, { "epoch": 14.8, "grad_norm": 0.12135462746119888, "learning_rate": 1.5175325332489331e-09, "loss": 0.0118, "sft_loss": 0.0013419981114566326, "step": 2480, "total_loss": 0.0014050020730792313, "value_loss": 0.0006300395589619257, "value_loss_search": 0.002390774656430494, "value_loss_thought": 0.00264954178167045 }, { "epoch": 14.83, "grad_norm": 0.12877818502933935, "learning_rate": 9.712561987104685e-10, "loss": 0.012, "sft_loss": 0.001536320144077763, "step": 2485, "total_loss": 0.0015917829690181406, "value_loss": 0.0005546283648982353, "value_loss_search": 0.0021470139769645515, "value_loss_thought": 0.0022900129014942648 }, { "epoch": 14.86, "grad_norm": 0.12977801368527653, "learning_rate": 5.463470898017798e-10, "loss": 0.0128, "sft_loss": 0.00153597031312529, "step": 2490, "total_loss": 0.0015952805159344051, "value_loss": 0.0005931021074616182, "value_loss_search": 0.002620013221735462, "value_loss_thought": 0.002124803609603987 }, { "epoch": 14.89, "grad_norm": 0.10846141171279486, "learning_rate": 2.4282584255547194e-10, "loss": 0.0121, "sft_loss": 0.0014916551124770194, "step": 2495, "total_loss": 0.00154097568412368, "value_loss": 0.0004932056985808231, "value_loss_search": 0.001677905346400621, "value_loss_thought": 0.0022677402528643144 }, { "epoch": 14.92, "grad_norm": 0.1265986974514275, "learning_rate": 6.070719771156252e-11, "loss": 0.0123, "sft_loss": 0.0017506703617982567, "step": 2500, "total_loss": 0.0018172925318538091, "value_loss": 0.0006662217223492916, "value_loss_search": 0.00269593252516529, "value_loss_thought": 0.0026338411991446266 }, { "epoch": 14.95, "grad_norm": 0.12729059230038972, "learning_rate": 0.0, "loss": 0.0116, "sft_loss": 0.0012781478551914915, "step": 2505, "total_loss": 0.0013354677166319106, "value_loss": 0.0005731985727152278, "value_loss_search": 0.0018533691099037243, "value_loss_thought": 0.0027322194214775665 }, { "epoch": 14.95, "step": 2505, "total_flos": 0.0, "train_loss": 0.09394951220936404, "train_runtime": 92418.8162, "train_samples_per_second": 3.481, "train_steps_per_second": 0.027 } ], "logging_steps": 5, "max_steps": 2505, "num_input_tokens_seen": 0, "num_train_epochs": 15, "save_steps": 350, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }