sunhaonlp's picture
Upload trainer_state.json with huggingface_hub
87c14f7 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 14.94964565460649,
"eval_steps": 500,
"global_step": 2505,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03,
"grad_norm": 29.208302611647333,
"learning_rate": 9.960159362549802e-08,
"loss": 1.1515,
"sft_loss": 0.1292552625760436,
"step": 5,
"total_loss": 0.15341808083976502,
"value_loss": 0.24162817830219865,
"value_loss_search": 0.8858459698676597,
"value_loss_thought": 1.0471794582015719
},
{
"epoch": 0.06,
"grad_norm": 22.329741865045907,
"learning_rate": 1.9920318725099604e-07,
"loss": 1.2242,
"sft_loss": 0.13713806308805943,
"step": 10,
"total_loss": 0.16430261046643863,
"value_loss": 0.27164546787971633,
"value_loss_search": 1.0301193907580455,
"value_loss_thought": 1.1430443533812649
},
{
"epoch": 0.09,
"grad_norm": 16.69220166984679,
"learning_rate": 2.9880478087649405e-07,
"loss": 1.1712,
"sft_loss": 0.11755451802164316,
"step": 15,
"total_loss": 0.1385691657076677,
"value_loss": 0.21014647737029007,
"value_loss_search": 0.8529300302307092,
"value_loss_thought": 0.8282418000162579
},
{
"epoch": 0.12,
"grad_norm": 15.26729873693678,
"learning_rate": 3.9840637450199207e-07,
"loss": 1.047,
"sft_loss": 0.08705389499664307,
"step": 20,
"total_loss": 0.10808856324001681,
"value_loss": 0.21034667690400966,
"value_loss_search": 0.9239509059012562,
"value_loss_thought": 0.7588225028477609
},
{
"epoch": 0.15,
"grad_norm": 16.491728960581845,
"learning_rate": 4.9800796812749e-07,
"loss": 1.1149,
"sft_loss": 0.11774167586117983,
"step": 25,
"total_loss": 0.13845662841631565,
"value_loss": 0.20714952626731248,
"value_loss_search": 0.7495332275483975,
"value_loss_thought": 0.9076629818649963
},
{
"epoch": 0.18,
"grad_norm": 13.549454686271,
"learning_rate": 5.976095617529881e-07,
"loss": 0.883,
"sft_loss": 0.083274289034307,
"step": 30,
"total_loss": 0.10342498328827787,
"value_loss": 0.2015069484245032,
"value_loss_search": 0.6761670514111756,
"value_loss_thought": 0.9358885432491661
},
{
"epoch": 0.21,
"grad_norm": 13.443693495652768,
"learning_rate": 6.972111553784861e-07,
"loss": 0.805,
"sft_loss": 0.08413450215011835,
"step": 35,
"total_loss": 0.1025179280739394,
"value_loss": 0.18383425649954005,
"value_loss_search": 0.5737597634542908,
"value_loss_thought": 0.8969142883783207
},
{
"epoch": 0.24,
"grad_norm": 9.393174013923895,
"learning_rate": 7.968127490039841e-07,
"loss": 0.7925,
"sft_loss": 0.07780555076897144,
"step": 40,
"total_loss": 0.0958856519588153,
"value_loss": 0.18080101079540328,
"value_loss_search": 0.6263274765132223,
"value_loss_thought": 0.8200806069507962
},
{
"epoch": 0.27,
"grad_norm": 14.557899943787424,
"learning_rate": 8.964143426294822e-07,
"loss": 0.6601,
"sft_loss": 0.06730504501610994,
"step": 45,
"total_loss": 0.08439774930666318,
"value_loss": 0.17092704317183233,
"value_loss_search": 0.6156709093504105,
"value_loss_thought": 0.7517454390181229
},
{
"epoch": 0.3,
"grad_norm": 5.237593181257475,
"learning_rate": 9.9601593625498e-07,
"loss": 0.5741,
"sft_loss": 0.05471886033192277,
"step": 50,
"total_loss": 0.06898687301945756,
"value_loss": 0.14268012425163762,
"value_loss_search": 0.5047549274406264,
"value_loss_thought": 0.6366860627662391
},
{
"epoch": 0.33,
"grad_norm": 5.49922083273137,
"learning_rate": 1.0956175298804781e-06,
"loss": 0.5123,
"sft_loss": 0.05361118288710713,
"step": 55,
"total_loss": 0.0653495688289695,
"value_loss": 0.1173838603310287,
"value_loss_search": 0.3898644742756964,
"value_loss_thought": 0.5492064105579629
},
{
"epoch": 0.36,
"grad_norm": 4.288203361946918,
"learning_rate": 1.1952191235059762e-06,
"loss": 0.4635,
"sft_loss": 0.03561480939388275,
"step": 60,
"total_loss": 0.04738654125249013,
"value_loss": 0.11771731851040386,
"value_loss_search": 0.3933807262365008,
"value_loss_thought": 0.5483578289393336
},
{
"epoch": 0.39,
"grad_norm": 4.058680831426542,
"learning_rate": 1.294820717131474e-06,
"loss": 0.405,
"sft_loss": 0.03836059970781207,
"step": 65,
"total_loss": 0.049667860931367616,
"value_loss": 0.1130726064555347,
"value_loss_search": 0.39820473948711876,
"value_loss_thought": 0.5063761109966436
},
{
"epoch": 0.42,
"grad_norm": 6.869902478996511,
"learning_rate": 1.3944223107569721e-06,
"loss": 0.3733,
"sft_loss": 0.03362296093255281,
"step": 70,
"total_loss": 0.04714309505507117,
"value_loss": 0.13520133687416092,
"value_loss_search": 0.4016104494468891,
"value_loss_thought": 0.6800002471776679
},
{
"epoch": 0.45,
"grad_norm": 6.311680041987265,
"learning_rate": 1.4940239043824702e-06,
"loss": 0.3481,
"sft_loss": 0.028891393821686505,
"step": 75,
"total_loss": 0.04102085728627571,
"value_loss": 0.12129463239980395,
"value_loss_search": 0.4050044394622091,
"value_loss_thought": 0.5653526145266369
},
{
"epoch": 0.48,
"grad_norm": 2.336922175624455,
"learning_rate": 1.5936254980079683e-06,
"loss": 0.3233,
"sft_loss": 0.022906177304685117,
"step": 80,
"total_loss": 0.034932951488735854,
"value_loss": 0.12026773888501338,
"value_loss_search": 0.3894530791064881,
"value_loss_thought": 0.5726888305682223
},
{
"epoch": 0.51,
"grad_norm": 5.45038020134611,
"learning_rate": 1.6932270916334661e-06,
"loss": 0.3587,
"sft_loss": 0.028216248843818903,
"step": 85,
"total_loss": 0.04048567872773674,
"value_loss": 0.12269429753323494,
"value_loss_search": 0.40813677250607727,
"value_loss_thought": 0.5734176081576152
},
{
"epoch": 0.54,
"grad_norm": 4.373139438548536,
"learning_rate": 1.7928286852589644e-06,
"loss": 0.322,
"sft_loss": 0.030063434736803174,
"step": 90,
"total_loss": 0.0427513758506393,
"value_loss": 0.1268794086528942,
"value_loss_search": 0.45016070968977145,
"value_loss_thought": 0.5648745556129142
},
{
"epoch": 0.57,
"grad_norm": 3.3872903271458408,
"learning_rate": 1.8924302788844623e-06,
"loss": 0.3401,
"sft_loss": 0.03258526036515832,
"step": 95,
"total_loss": 0.04433769078787009,
"value_loss": 0.11752430766646285,
"value_loss_search": 0.3465800850848808,
"value_loss_thought": 0.5936143765953602
},
{
"epoch": 0.6,
"grad_norm": 2.899664564340515,
"learning_rate": 1.99203187250996e-06,
"loss": 0.3067,
"sft_loss": 0.023941753478720785,
"step": 100,
"total_loss": 0.03416529792812071,
"value_loss": 0.10223544192849658,
"value_loss_search": 0.3503992037207354,
"value_loss_thought": 0.46748433216125707
},
{
"epoch": 0.63,
"grad_norm": 5.3644737487705125,
"learning_rate": 2.0916334661354584e-06,
"loss": 0.3253,
"sft_loss": 0.03116959072649479,
"step": 105,
"total_loss": 0.04235388926063024,
"value_loss": 0.1118429817724973,
"value_loss_search": 0.3848635824284429,
"value_loss_thought": 0.509880276385229
},
{
"epoch": 0.66,
"grad_norm": 2.807659467395103,
"learning_rate": 2.1912350597609563e-06,
"loss": 0.3242,
"sft_loss": 0.030541227804496883,
"step": 110,
"total_loss": 0.03971213593467837,
"value_loss": 0.09170907911611721,
"value_loss_search": 0.2967309238272719,
"value_loss_thought": 0.4369417035020888
},
{
"epoch": 0.69,
"grad_norm": 5.565383438847989,
"learning_rate": 2.290836653386454e-06,
"loss": 0.2985,
"sft_loss": 0.03250002646818757,
"step": 115,
"total_loss": 0.04239036420258344,
"value_loss": 0.09890337522811024,
"value_loss_search": 0.3520674262268585,
"value_loss_thought": 0.43915957515127957
},
{
"epoch": 0.72,
"grad_norm": 6.59850948978845,
"learning_rate": 2.3904382470119524e-06,
"loss": 0.3364,
"sft_loss": 0.025409412384033204,
"step": 120,
"total_loss": 0.03802986370919825,
"value_loss": 0.12620451210823375,
"value_loss_search": 0.3680036140809534,
"value_loss_thought": 0.6416324874851853
},
{
"epoch": 0.75,
"grad_norm": 5.179904397966752,
"learning_rate": 2.4900398406374503e-06,
"loss": 0.3036,
"sft_loss": 0.029751096572726964,
"step": 125,
"total_loss": 0.04014970105636166,
"value_loss": 0.10398604299989529,
"value_loss_search": 0.3222789403582283,
"value_loss_thought": 0.5096093997417483
},
{
"epoch": 0.78,
"grad_norm": 2.7818925064532296,
"learning_rate": 2.589641434262948e-06,
"loss": 0.3135,
"sft_loss": 0.027432794403284787,
"step": 130,
"total_loss": 0.039190283310017546,
"value_loss": 0.1175748852387187,
"value_loss_search": 0.379848483775595,
"value_loss_thought": 0.5607506038024439
},
{
"epoch": 0.81,
"grad_norm": 3.1180919559174898,
"learning_rate": 2.6892430278884464e-06,
"loss": 0.2712,
"sft_loss": 0.02562392014078796,
"step": 135,
"total_loss": 0.037023369785310935,
"value_loss": 0.11399449376476696,
"value_loss_search": 0.3599409429902153,
"value_loss_thought": 0.552015009484603
},
{
"epoch": 0.84,
"grad_norm": 4.040800077838943,
"learning_rate": 2.7888446215139443e-06,
"loss": 0.3085,
"sft_loss": 0.030298156943172217,
"step": 140,
"total_loss": 0.040708614706454684,
"value_loss": 0.10410457447142107,
"value_loss_search": 0.3299579021160753,
"value_loss_thought": 0.5028786922775907
},
{
"epoch": 0.87,
"grad_norm": 3.7542993625502863,
"learning_rate": 2.8884462151394426e-06,
"loss": 0.2901,
"sft_loss": 0.033623593579977754,
"step": 145,
"total_loss": 0.04418132658774994,
"value_loss": 0.10557732760353247,
"value_loss_search": 0.3160576841353759,
"value_loss_thought": 0.528560933744302
},
{
"epoch": 0.9,
"grad_norm": 4.460937657390284,
"learning_rate": 2.9880478087649404e-06,
"loss": 0.2916,
"sft_loss": 0.027346841990947723,
"step": 150,
"total_loss": 0.03589563165332947,
"value_loss": 0.08548789632186526,
"value_loss_search": 0.2342234575400653,
"value_loss_thought": 0.4496797103092831
},
{
"epoch": 0.93,
"grad_norm": 2.4354922648897808,
"learning_rate": 3.0876494023904387e-06,
"loss": 0.2869,
"sft_loss": 0.0277699186000973,
"step": 155,
"total_loss": 0.03829326016602863,
"value_loss": 0.10523341278894804,
"value_loss_search": 0.30548287588171663,
"value_loss_thought": 0.536384426720906
},
{
"epoch": 0.95,
"grad_norm": 7.993819548921568,
"learning_rate": 3.1872509960159366e-06,
"loss": 0.2799,
"sft_loss": 0.028502677148208023,
"step": 160,
"total_loss": 0.03686514748242189,
"value_loss": 0.08362470217980444,
"value_loss_search": 0.242223716438275,
"value_loss_thought": 0.4267738984548487
},
{
"epoch": 0.98,
"grad_norm": 3.8893692041440224,
"learning_rate": 3.2868525896414344e-06,
"loss": 0.271,
"sft_loss": 0.03119087303057313,
"step": 165,
"total_loss": 0.04075200489824056,
"value_loss": 0.09561131574337196,
"value_loss_search": 0.26875160563213285,
"value_loss_thought": 0.4961389156705991
},
{
"epoch": 1.01,
"grad_norm": 3.77858117117453,
"learning_rate": 3.3864541832669323e-06,
"loss": 0.2499,
"sft_loss": 0.019535421626642346,
"step": 170,
"total_loss": 0.025970515375956894,
"value_loss": 0.06435093678010162,
"value_loss_search": 0.20992231852004578,
"value_loss_thought": 0.30488517887424677
},
{
"epoch": 1.04,
"grad_norm": 3.951277466183642,
"learning_rate": 3.486055776892431e-06,
"loss": 0.2693,
"sft_loss": 0.023267556354403494,
"step": 175,
"total_loss": 0.030257757987055812,
"value_loss": 0.0699020166444825,
"value_loss_search": 0.21118213117242704,
"value_loss_thought": 0.3480340027017519
},
{
"epoch": 1.07,
"grad_norm": 2.9207350101372365,
"learning_rate": 3.585657370517929e-06,
"loss": 0.262,
"sft_loss": 0.025930221634916963,
"step": 180,
"total_loss": 0.03511905904761079,
"value_loss": 0.09188836992834695,
"value_loss_search": 0.26077198034647325,
"value_loss_thought": 0.4743349766329629
},
{
"epoch": 1.1,
"grad_norm": 5.609912691724805,
"learning_rate": 3.6852589641434267e-06,
"loss": 0.2446,
"sft_loss": 0.024912988301366568,
"step": 185,
"total_loss": 0.03355662206740817,
"value_loss": 0.08643633674801095,
"value_loss_search": 0.24081130932386258,
"value_loss_thought": 0.4506793859531172
},
{
"epoch": 1.13,
"grad_norm": 3.5622387501649775,
"learning_rate": 3.7848605577689246e-06,
"loss": 0.2466,
"sft_loss": 0.028595651406794785,
"step": 190,
"total_loss": 0.0361521876320694,
"value_loss": 0.07556536267784394,
"value_loss_search": 0.22485005248195195,
"value_loss_thought": 0.37967284960468534
},
{
"epoch": 1.16,
"grad_norm": 5.1634897659031616,
"learning_rate": 3.884462151394423e-06,
"loss": 0.23,
"sft_loss": 0.02528013151604682,
"step": 195,
"total_loss": 0.03363830259149836,
"value_loss": 0.08358170948340557,
"value_loss_search": 0.25611529394162746,
"value_loss_thought": 0.4125383788938052
},
{
"epoch": 1.19,
"grad_norm": 2.0650765438648104,
"learning_rate": 3.98406374501992e-06,
"loss": 0.245,
"sft_loss": 0.016848266730085014,
"step": 200,
"total_loss": 0.026103121823689436,
"value_loss": 0.09254854982718826,
"value_loss_search": 0.3132247169883044,
"value_loss_thought": 0.4271636780547851
},
{
"epoch": 1.22,
"grad_norm": 5.039293863721038,
"learning_rate": 4.083665338645419e-06,
"loss": 0.2528,
"sft_loss": 0.02782264384441078,
"step": 205,
"total_loss": 0.03652619049535133,
"value_loss": 0.08703546431788708,
"value_loss_search": 0.24252449526474038,
"value_loss_thought": 0.453759221357177
},
{
"epoch": 1.25,
"grad_norm": 4.004833085846898,
"learning_rate": 4.183266932270917e-06,
"loss": 0.2485,
"sft_loss": 0.03399158006068319,
"step": 210,
"total_loss": 0.04147085763215728,
"value_loss": 0.07479277592756262,
"value_loss_search": 0.24146745675992634,
"value_loss_thought": 0.3568747464043554
},
{
"epoch": 1.28,
"grad_norm": 2.2900194027614567,
"learning_rate": 4.282868525896415e-06,
"loss": 0.2386,
"sft_loss": 0.030579356662929057,
"step": 215,
"total_loss": 0.0377965772360767,
"value_loss": 0.07217220571910729,
"value_loss_search": 0.23593004742797347,
"value_loss_thought": 0.34144759898772464
},
{
"epoch": 1.31,
"grad_norm": 2.2530806573737077,
"learning_rate": 4.382470119521913e-06,
"loss": 0.2305,
"sft_loss": 0.02292049501556903,
"step": 220,
"total_loss": 0.028867545309185515,
"value_loss": 0.05947050442919135,
"value_loss_search": 0.17588968106138053,
"value_loss_thought": 0.2998743573494721
},
{
"epoch": 1.34,
"grad_norm": 2.3848100156605976,
"learning_rate": 4.482071713147411e-06,
"loss": 0.2488,
"sft_loss": 0.021163933374918998,
"step": 225,
"total_loss": 0.027522947750139793,
"value_loss": 0.06359014347035555,
"value_loss_search": 0.17336269621831663,
"value_loss_thought": 0.33535845117003193
},
{
"epoch": 1.37,
"grad_norm": 3.155244067655952,
"learning_rate": 4.581673306772908e-06,
"loss": 0.2318,
"sft_loss": 0.021529758046381176,
"step": 230,
"total_loss": 0.02896902564170887,
"value_loss": 0.07439267357622156,
"value_loss_search": 0.21162400761529626,
"value_loss_thought": 0.3835173849183775
},
{
"epoch": 1.4,
"grad_norm": 3.8792764757968086,
"learning_rate": 4.681274900398407e-06,
"loss": 0.2156,
"sft_loss": 0.023555860621854663,
"step": 235,
"total_loss": 0.03241225123529148,
"value_loss": 0.08856390380533412,
"value_loss_search": 0.24428551671026072,
"value_loss_thought": 0.4642257096977119
},
{
"epoch": 1.43,
"grad_norm": 3.4958134022432183,
"learning_rate": 4.780876494023905e-06,
"loss": 0.238,
"sft_loss": 0.024119808338582516,
"step": 240,
"total_loss": 0.02983194561311393,
"value_loss": 0.05712137209266075,
"value_loss_search": 0.17349829038121242,
"value_loss_thought": 0.28347268382058244
},
{
"epoch": 1.46,
"grad_norm": 2.3555543700333303,
"learning_rate": 4.880478087649403e-06,
"loss": 0.2381,
"sft_loss": 0.02051589481998235,
"step": 245,
"total_loss": 0.02669115279750258,
"value_loss": 0.0617525799851137,
"value_loss_search": 0.1842656165907101,
"value_loss_thought": 0.3097550194659561
},
{
"epoch": 1.49,
"grad_norm": 7.0740358168657576,
"learning_rate": 4.980079681274901e-06,
"loss": 0.2514,
"sft_loss": 0.02382271084934473,
"step": 250,
"total_loss": 0.031842488735128426,
"value_loss": 0.08019777825520577,
"value_loss_search": 0.2615329277869023,
"value_loss_thought": 0.38004929782619
},
{
"epoch": 1.52,
"grad_norm": 2.299103577467398,
"learning_rate": 4.9999611473368576e-06,
"loss": 0.2199,
"sft_loss": 0.02000407627783716,
"step": 255,
"total_loss": 0.025701836839834867,
"value_loss": 0.05697760357270454,
"value_loss_search": 0.14788065557549998,
"value_loss_thought": 0.3079401723291085
},
{
"epoch": 1.55,
"grad_norm": 1.6807413310213553,
"learning_rate": 4.999803310462543e-06,
"loss": 0.2232,
"sft_loss": 0.025435299216769635,
"step": 260,
"total_loss": 0.033880134870196345,
"value_loss": 0.08444835591653828,
"value_loss_search": 0.2635558438050339,
"value_loss_thought": 0.4120309994032141
},
{
"epoch": 1.58,
"grad_norm": 3.984061493281701,
"learning_rate": 4.999524068745182e-06,
"loss": 0.2248,
"sft_loss": 0.022567298170179127,
"step": 265,
"total_loss": 0.02977634134258551,
"value_loss": 0.07209043111970459,
"value_loss_search": 0.22659471631785663,
"value_loss_thought": 0.35012873328050775
},
{
"epoch": 1.61,
"grad_norm": 5.581660893537601,
"learning_rate": 4.99912343574636e-06,
"loss": 0.2457,
"sft_loss": 0.02686069840565324,
"step": 270,
"total_loss": 0.033531226357308694,
"value_loss": 0.06670527966834924,
"value_loss_search": 0.21075203863763364,
"value_loss_thought": 0.3228901971851883
},
{
"epoch": 1.64,
"grad_norm": 4.189848962908778,
"learning_rate": 4.998601430923122e-06,
"loss": 0.2437,
"sft_loss": 0.025497494312003256,
"step": 275,
"total_loss": 0.031059039360116003,
"value_loss": 0.0556154465644795,
"value_loss_search": 0.16651217907464116,
"value_loss_thought": 0.27841138996300285
},
{
"epoch": 1.67,
"grad_norm": 1.2840019127815288,
"learning_rate": 4.997958079627029e-06,
"loss": 0.2337,
"sft_loss": 0.02229512729682028,
"step": 280,
"total_loss": 0.02872599834945504,
"value_loss": 0.06430871063357699,
"value_loss_search": 0.15088917155694617,
"value_loss_thought": 0.3635805150745
},
{
"epoch": 1.7,
"grad_norm": 1.9906954863509052,
"learning_rate": 4.997193413102923e-06,
"loss": 0.2358,
"sft_loss": 0.030755233392119406,
"step": 285,
"total_loss": 0.03727109958572328,
"value_loss": 0.06515866482732235,
"value_loss_search": 0.19967410092722276,
"value_loss_thought": 0.3215952147511416
},
{
"epoch": 1.73,
"grad_norm": 2.0542643472869706,
"learning_rate": 4.996307468487414e-06,
"loss": 0.222,
"sft_loss": 0.02293794075958431,
"step": 290,
"total_loss": 0.027633006192809263,
"value_loss": 0.04695065361320303,
"value_loss_search": 0.1386001783932443,
"value_loss_thought": 0.23700505015585804
},
{
"epoch": 1.76,
"grad_norm": 2.1566271207415375,
"learning_rate": 4.995300288807075e-06,
"loss": 0.2254,
"sft_loss": 0.024227931816130876,
"step": 295,
"total_loss": 0.0282421637364223,
"value_loss": 0.04014231975597795,
"value_loss_search": 0.13643495284136406,
"value_loss_thought": 0.18470360402425284
},
{
"epoch": 1.79,
"grad_norm": 1.780628989439151,
"learning_rate": 4.994171922976349e-06,
"loss": 0.226,
"sft_loss": 0.019921383424662053,
"step": 300,
"total_loss": 0.02442953032641526,
"value_loss": 0.045081469729302624,
"value_loss_search": 0.1360888234203003,
"value_loss_thought": 0.22456293730974722
},
{
"epoch": 1.82,
"grad_norm": 2.350607243352896,
"learning_rate": 4.992922425795179e-06,
"loss": 0.2152,
"sft_loss": 0.021342089958488943,
"step": 305,
"total_loss": 0.02731037002786252,
"value_loss": 0.059682800685004624,
"value_loss_search": 0.14473845993270515,
"value_loss_thought": 0.33272394547384465
},
{
"epoch": 1.85,
"grad_norm": 2.476250174714291,
"learning_rate": 4.991551857946343e-06,
"loss": 0.2167,
"sft_loss": 0.024368287762627006,
"step": 310,
"total_loss": 0.02891062788005456,
"value_loss": 0.04542339965519204,
"value_loss_search": 0.13215827879938616,
"value_loss_thought": 0.23122892067385692
},
{
"epoch": 1.88,
"grad_norm": 2.1211906530083304,
"learning_rate": 4.990060285992507e-06,
"loss": 0.2267,
"sft_loss": 0.03224845631048083,
"step": 315,
"total_loss": 0.0367003864619619,
"value_loss": 0.044519302164553666,
"value_loss_search": 0.13228539347510376,
"value_loss_thought": 0.22386902507196282
},
{
"epoch": 1.91,
"grad_norm": 4.051009811709579,
"learning_rate": 4.988447782372996e-06,
"loss": 0.2227,
"sft_loss": 0.015061728050932288,
"step": 320,
"total_loss": 0.020883486873708536,
"value_loss": 0.0582175869083585,
"value_loss_search": 0.18025588636555767,
"value_loss_thought": 0.28548481071411513
},
{
"epoch": 1.94,
"grad_norm": 6.185126993035625,
"learning_rate": 4.986714425400269e-06,
"loss": 0.2242,
"sft_loss": 0.02483751201070845,
"step": 325,
"total_loss": 0.029222130161724636,
"value_loss": 0.043846180192485915,
"value_loss_search": 0.126026460820583,
"value_loss_thought": 0.22474298011511565
},
{
"epoch": 1.97,
"grad_norm": 1.2748032727970502,
"learning_rate": 4.98486029925612e-06,
"loss": 0.2113,
"sft_loss": 0.022788235195912422,
"step": 330,
"total_loss": 0.027279667726634215,
"value_loss": 0.04491432422255457,
"value_loss_search": 0.14298201398346463,
"value_loss_thought": 0.21633257850262452
},
{
"epoch": 2.0,
"grad_norm": 1.552484041484157,
"learning_rate": 4.982885493987595e-06,
"loss": 0.2124,
"sft_loss": 0.02811491028405726,
"step": 335,
"total_loss": 0.032856917950812206,
"value_loss": 0.04742007501154148,
"value_loss_search": 0.15364139593775689,
"value_loss_thought": 0.22571920259069883
},
{
"epoch": 2.03,
"grad_norm": 2.7785428382948316,
"learning_rate": 4.9807901055026054e-06,
"loss": 0.1788,
"sft_loss": 0.018437814386561512,
"step": 340,
"total_loss": 0.02146597366839842,
"value_loss": 0.03028159231407699,
"value_loss_search": 0.10903610251207282,
"value_loss_thought": 0.13321663648093818
},
{
"epoch": 2.06,
"grad_norm": 1.2850611180768896,
"learning_rate": 4.978574235565284e-06,
"loss": 0.1788,
"sft_loss": 0.023076185397803782,
"step": 345,
"total_loss": 0.027342022850689318,
"value_loss": 0.04265837599887164,
"value_loss_search": 0.13071881325704454,
"value_loss_thought": 0.21054819686614792
},
{
"epoch": 2.09,
"grad_norm": 1.3911394308456917,
"learning_rate": 4.976237991791033e-06,
"loss": 0.1719,
"sft_loss": 0.019436489534564318,
"step": 350,
"total_loss": 0.023576461341781395,
"value_loss": 0.041399715623504106,
"value_loss_search": 0.1492293958552068,
"value_loss_thought": 0.18196833048205008
},
{
"epoch": 2.12,
"grad_norm": 1.6481312891558262,
"learning_rate": 4.973781487641303e-06,
"loss": 0.1844,
"sft_loss": 0.025559269287623466,
"step": 355,
"total_loss": 0.02877166439211578,
"value_loss": 0.032123952108486266,
"value_loss_search": 0.11874442657458531,
"value_loss_thought": 0.13824718931678034
},
{
"epoch": 2.15,
"grad_norm": 1.103781032562002,
"learning_rate": 4.9712048424180806e-06,
"loss": 0.1862,
"sft_loss": 0.02381216634530574,
"step": 360,
"total_loss": 0.02632210613701318,
"value_loss": 0.025099397322048845,
"value_loss_search": 0.08361520563119029,
"value_loss_thought": 0.11717997152591124
},
{
"epoch": 2.18,
"grad_norm": 2.0386255202501173,
"learning_rate": 4.968508181258093e-06,
"loss": 0.1637,
"sft_loss": 0.01670523874927312,
"step": 365,
"total_loss": 0.019544158500229968,
"value_loss": 0.028389195861564076,
"value_loss_search": 0.08506083722818403,
"value_loss_thought": 0.14205273159604986
},
{
"epoch": 2.21,
"grad_norm": 1.7600289408433292,
"learning_rate": 4.965691635126737e-06,
"loss": 0.1816,
"sft_loss": 0.0221649584826082,
"step": 370,
"total_loss": 0.02530371010229828,
"value_loss": 0.03138751635051449,
"value_loss_search": 0.099762270176916,
"value_loss_thought": 0.1513378613846726
},
{
"epoch": 2.24,
"grad_norm": 1.2580654310157564,
"learning_rate": 4.962755340811709e-06,
"loss": 0.179,
"sft_loss": 0.01950194430537522,
"step": 375,
"total_loss": 0.022580356493926958,
"value_loss": 0.03078412101158392,
"value_loss_search": 0.09001722024981973,
"value_loss_thought": 0.15625574861769564
},
{
"epoch": 2.27,
"grad_norm": 2.234333438325428,
"learning_rate": 4.959699440916369e-06,
"loss": 0.1749,
"sft_loss": 0.022818994731642304,
"step": 380,
"total_loss": 0.025907937735428276,
"value_loss": 0.030889428492628213,
"value_loss_search": 0.08153837720744832,
"value_loss_thought": 0.16557704737147105
},
{
"epoch": 2.3,
"grad_norm": 1.085143168299747,
"learning_rate": 4.956524083852812e-06,
"loss": 0.1667,
"sft_loss": 0.01802813063841313,
"step": 385,
"total_loss": 0.020953079688115394,
"value_loss": 0.029249489003632334,
"value_loss_search": 0.09198170516392565,
"value_loss_thought": 0.14201420772205892
},
{
"epoch": 2.33,
"grad_norm": 2.242763660758448,
"learning_rate": 4.953229423834662e-06,
"loss": 0.1818,
"sft_loss": 0.011760137742385268,
"step": 390,
"total_loss": 0.015489974882802926,
"value_loss": 0.03729837100072473,
"value_loss_search": 0.08624583505323927,
"value_loss_thought": 0.21214113264650222
},
{
"epoch": 2.36,
"grad_norm": 0.8938036920537223,
"learning_rate": 4.949815620869579e-06,
"loss": 0.1824,
"sft_loss": 0.02469517719000578,
"step": 395,
"total_loss": 0.0275583725127035,
"value_loss": 0.02863195287209237,
"value_loss_search": 0.08686879273809608,
"value_loss_thought": 0.1421868310884747
},
{
"epoch": 2.39,
"grad_norm": 0.7973027480525212,
"learning_rate": 4.946282840751494e-06,
"loss": 0.1769,
"sft_loss": 0.018014212045818566,
"step": 400,
"total_loss": 0.021850849609819534,
"value_loss": 0.03836637459389749,
"value_loss_search": 0.07837754817135192,
"value_loss_thought": 0.22855344959643845
},
{
"epoch": 2.42,
"grad_norm": 1.2529435303540792,
"learning_rate": 4.942631255052551e-06,
"loss": 0.1728,
"sft_loss": 0.015492680622264743,
"step": 405,
"total_loss": 0.018379339482680734,
"value_loss": 0.028866587373761375,
"value_loss_search": 0.09674510946805412,
"value_loss_thought": 0.1341875916292338
},
{
"epoch": 2.45,
"grad_norm": 0.7134673687996919,
"learning_rate": 4.938861041114779e-06,
"loss": 0.1664,
"sft_loss": 0.01866905202623457,
"step": 410,
"total_loss": 0.020626170139428267,
"value_loss": 0.019571180698403624,
"value_loss_search": 0.06965761288097383,
"value_loss_thought": 0.08691183333139633
},
{
"epoch": 2.48,
"grad_norm": 0.7529741551559901,
"learning_rate": 4.934972382041475e-06,
"loss": 0.1849,
"sft_loss": 0.019827575120143593,
"step": 415,
"total_loss": 0.022788787103309005,
"value_loss": 0.029612120029923972,
"value_loss_search": 0.0888989626518196,
"value_loss_thought": 0.1479980006653932
},
{
"epoch": 2.51,
"grad_norm": 1.7995496682865593,
"learning_rate": 4.9309654666883165e-06,
"loss": 0.179,
"sft_loss": 0.020990492962300777,
"step": 420,
"total_loss": 0.024445655822250955,
"value_loss": 0.03455162828067841,
"value_loss_search": 0.08684888708735343,
"value_loss_thought": 0.18956413638661615
},
{
"epoch": 2.54,
"grad_norm": 1.670690442471347,
"learning_rate": 4.926840489654184e-06,
"loss": 0.1894,
"sft_loss": 0.02077859474811703,
"step": 425,
"total_loss": 0.024163767833670136,
"value_loss": 0.033851731998584,
"value_loss_search": 0.08538484702294227,
"value_loss_thought": 0.18542900827596895
},
{
"epoch": 2.57,
"grad_norm": 1.8704656616088737,
"learning_rate": 4.922597651271716e-06,
"loss": 0.1927,
"sft_loss": 0.02478037038818002,
"step": 430,
"total_loss": 0.02731952823942265,
"value_loss": 0.025391577205482463,
"value_loss_search": 0.08109033311634448,
"value_loss_thought": 0.12204228106857044
},
{
"epoch": 2.6,
"grad_norm": 1.3917455631379416,
"learning_rate": 4.918237157597574e-06,
"loss": 0.1792,
"sft_loss": 0.01933148135431111,
"step": 435,
"total_loss": 0.022168212975520872,
"value_loss": 0.02836731561783381,
"value_loss_search": 0.0816025677968355,
"value_loss_thought": 0.1453359603422541
},
{
"epoch": 2.63,
"grad_norm": 1.0019606814299877,
"learning_rate": 4.913759220402441e-06,
"loss": 0.1911,
"sft_loss": 0.019400009652599692,
"step": 440,
"total_loss": 0.02239533064052921,
"value_loss": 0.029953211315478255,
"value_loss_search": 0.10217987555329273,
"value_loss_thought": 0.13744581426071817
},
{
"epoch": 2.66,
"grad_norm": 1.2317965960561872,
"learning_rate": 4.9091640571607295e-06,
"loss": 0.1827,
"sft_loss": 0.016407015430741013,
"step": 445,
"total_loss": 0.018946515550851473,
"value_loss": 0.0253950018544856,
"value_loss_search": 0.07369275801845561,
"value_loss_thought": 0.12946725602414516
},
{
"epoch": 2.69,
"grad_norm": 0.683140754700333,
"learning_rate": 4.9044518910400285e-06,
"loss": 0.1676,
"sft_loss": 0.012768425536341966,
"step": 450,
"total_loss": 0.014526197344821412,
"value_loss": 0.017577718090615237,
"value_loss_search": 0.04510120849572559,
"value_loss_thought": 0.09552053666211577
},
{
"epoch": 2.72,
"grad_norm": 1.0907748085551225,
"learning_rate": 4.899622950890258e-06,
"loss": 0.1837,
"sft_loss": 0.017968191439285873,
"step": 455,
"total_loss": 0.02001593358502305,
"value_loss": 0.020477420029783387,
"value_loss_search": 0.04958656680909144,
"value_loss_thought": 0.11423279177743098
},
{
"epoch": 2.75,
"grad_norm": 1.3850337152675154,
"learning_rate": 4.894677471232556e-06,
"loss": 0.1908,
"sft_loss": 0.01755859658587724,
"step": 460,
"total_loss": 0.02234408485255699,
"value_loss": 0.04785487991980517,
"value_loss_search": 0.1180493140761314,
"value_loss_thought": 0.26478972275981505
},
{
"epoch": 2.78,
"grad_norm": 1.1181978133661836,
"learning_rate": 4.889615692247893e-06,
"loss": 0.1841,
"sft_loss": 0.021812843438237906,
"step": 465,
"total_loss": 0.025394389864322874,
"value_loss": 0.035815464633651574,
"value_loss_search": 0.09992547294255019,
"value_loss_thought": 0.18659824333662983
},
{
"epoch": 2.8,
"grad_norm": 2.0759753145829025,
"learning_rate": 4.884437859765403e-06,
"loss": 0.1824,
"sft_loss": 0.026800552336499094,
"step": 470,
"total_loss": 0.030636231325843255,
"value_loss": 0.03835678935120086,
"value_loss_search": 0.08862242994021016,
"value_loss_thought": 0.2182318838626088
},
{
"epoch": 2.83,
"grad_norm": 1.4431529345376723,
"learning_rate": 4.879144225250445e-06,
"loss": 0.1816,
"sft_loss": 0.017374001140706242,
"step": 475,
"total_loss": 0.01985499007805629,
"value_loss": 0.02480988986644661,
"value_loss_search": 0.06347666834494703,
"value_loss_thought": 0.1350024510633375
},
{
"epoch": 2.86,
"grad_norm": 1.4247925282572933,
"learning_rate": 4.873735045792395e-06,
"loss": 0.1818,
"sft_loss": 0.01573096849024296,
"step": 480,
"total_loss": 0.018171964960481547,
"value_loss": 0.02440996536379316,
"value_loss_search": 0.06245305103024066,
"value_loss_thought": 0.13282667167441103
},
{
"epoch": 2.89,
"grad_norm": 0.9971468267328366,
"learning_rate": 4.868210584092151e-06,
"loss": 0.1467,
"sft_loss": 0.014233330194838346,
"step": 485,
"total_loss": 0.017078536024837377,
"value_loss": 0.028452056018250006,
"value_loss_search": 0.08309140489452602,
"value_loss_thought": 0.14452504066284746
},
{
"epoch": 2.92,
"grad_norm": 1.023944906300116,
"learning_rate": 4.862571108449387e-06,
"loss": 0.1698,
"sft_loss": 0.015431807213462889,
"step": 490,
"total_loss": 0.01847703835471748,
"value_loss": 0.030452309869269813,
"value_loss_search": 0.068971808032029,
"value_loss_thought": 0.1746466698442873
},
{
"epoch": 2.95,
"grad_norm": 2.461096472197934,
"learning_rate": 4.856816892749512e-06,
"loss": 0.1971,
"sft_loss": 0.023769452376291154,
"step": 495,
"total_loss": 0.02749020796882178,
"value_loss": 0.03720755761728469,
"value_loss_search": 0.09986964019751668,
"value_loss_thought": 0.19779082740396917
},
{
"epoch": 2.98,
"grad_norm": 3.0866796216704744,
"learning_rate": 4.850948216450374e-06,
"loss": 0.1685,
"sft_loss": 0.014484391221776605,
"step": 500,
"total_loss": 0.017671175975374355,
"value_loss": 0.03186784757053829,
"value_loss_search": 0.08305875552287034,
"value_loss_thought": 0.17188402622778085
},
{
"epoch": 3.01,
"grad_norm": 1.5870923337393499,
"learning_rate": 4.844965364568688e-06,
"loss": 0.1597,
"sft_loss": 0.012694860575720668,
"step": 505,
"total_loss": 0.014846146440197572,
"value_loss": 0.021512858869391492,
"value_loss_search": 0.0868586509443503,
"value_loss_thought": 0.08524421998045
},
{
"epoch": 3.04,
"grad_norm": 1.2125544575036498,
"learning_rate": 4.838868627666191e-06,
"loss": 0.1309,
"sft_loss": 0.0139304670272395,
"step": 510,
"total_loss": 0.015479093678698064,
"value_loss": 0.01548626560065145,
"value_loss_search": 0.04746301869455181,
"value_loss_thought": 0.0764271073458076
},
{
"epoch": 3.07,
"grad_norm": 2.334834842181477,
"learning_rate": 4.8326583018355325e-06,
"loss": 0.1387,
"sft_loss": 0.017326009750831872,
"step": 515,
"total_loss": 0.018803953268570695,
"value_loss": 0.014779433516605423,
"value_loss_search": 0.04713020703025848,
"value_loss_thought": 0.07110526093983935
},
{
"epoch": 3.1,
"grad_norm": 1.5097098181886008,
"learning_rate": 4.826334688685895e-06,
"loss": 0.1436,
"sft_loss": 0.015178992017172276,
"step": 520,
"total_loss": 0.01744569096995292,
"value_loss": 0.022666989314620876,
"value_loss_search": 0.056544199488735104,
"value_loss_thought": 0.12479171633535771
},
{
"epoch": 3.13,
"grad_norm": 1.3535205448949577,
"learning_rate": 4.819898095328346e-06,
"loss": 0.1298,
"sft_loss": 0.012230370636098086,
"step": 525,
"total_loss": 0.013584458573981805,
"value_loss": 0.013540879723450416,
"value_loss_search": 0.05891674056322245,
"value_loss_thought": 0.04941029600959155
},
{
"epoch": 3.16,
"grad_norm": 1.3125271270813312,
"learning_rate": 4.8133488343609225e-06,
"loss": 0.1235,
"sft_loss": 0.011256256676279008,
"step": 530,
"total_loss": 0.01235277638277239,
"value_loss": 0.010965196802611387,
"value_loss_search": 0.03379480599472799,
"value_loss_thought": 0.05392676859810308
},
{
"epoch": 3.19,
"grad_norm": 1.2773968571118655,
"learning_rate": 4.8066872238534475e-06,
"loss": 0.1315,
"sft_loss": 0.015837551723234356,
"step": 535,
"total_loss": 0.01701334770821745,
"value_loss": 0.011757960020167957,
"value_loss_search": 0.04275373071531021,
"value_loss_thought": 0.05130994958924475
},
{
"epoch": 3.22,
"grad_norm": 2.250161610850135,
"learning_rate": 4.799913587332087e-06,
"loss": 0.1336,
"sft_loss": 0.01434246387798339,
"step": 540,
"total_loss": 0.017333269701339304,
"value_loss": 0.02990805795383835,
"value_loss_search": 0.1112344439959088,
"value_loss_thought": 0.1280300162234198
},
{
"epoch": 3.25,
"grad_norm": 1.7005049809514607,
"learning_rate": 4.793028253763633e-06,
"loss": 0.1382,
"sft_loss": 0.017506125732325017,
"step": 545,
"total_loss": 0.018909330308576954,
"value_loss": 0.014032045881458543,
"value_loss_search": 0.04037052274323969,
"value_loss_thought": 0.0718858446669401
},
{
"epoch": 3.28,
"grad_norm": 0.7583090366866635,
"learning_rate": 4.786031557539532e-06,
"loss": 0.1302,
"sft_loss": 0.01899501702282578,
"step": 550,
"total_loss": 0.020867432708973864,
"value_loss": 0.018724157162853316,
"value_loss_search": 0.0525303708315505,
"value_loss_thought": 0.09726288783901964
},
{
"epoch": 3.31,
"grad_norm": 0.6780107958188984,
"learning_rate": 4.7789238384596394e-06,
"loss": 0.1332,
"sft_loss": 0.01216251152800396,
"step": 555,
"total_loss": 0.013824499693544112,
"value_loss": 0.016619880075199944,
"value_loss_search": 0.043466050122447086,
"value_loss_thought": 0.08949299015621363
},
{
"epoch": 3.34,
"grad_norm": 1.186243776959505,
"learning_rate": 4.771705441715722e-06,
"loss": 0.1337,
"sft_loss": 0.012400240125134587,
"step": 560,
"total_loss": 0.015292362061109089,
"value_loss": 0.02892121910044807,
"value_loss_search": 0.07066335178838017,
"value_loss_thought": 0.160706402354117
},
{
"epoch": 3.37,
"grad_norm": 0.8074975390166714,
"learning_rate": 4.76437671787469e-06,
"loss": 0.1276,
"sft_loss": 0.013388467975892127,
"step": 565,
"total_loss": 0.014532541646792652,
"value_loss": 0.011440736869690226,
"value_loss_search": 0.042682257217265374,
"value_loss_thought": 0.04884363801356813
},
{
"epoch": 3.4,
"grad_norm": 1.1536407465687832,
"learning_rate": 4.756938022861575e-06,
"loss": 0.1361,
"sft_loss": 0.014692733390256762,
"step": 570,
"total_loss": 0.016363070430702464,
"value_loss": 0.01670336974376596,
"value_loss_search": 0.05988950296840585,
"value_loss_thought": 0.07373745448749106
},
{
"epoch": 3.43,
"grad_norm": 1.296361708786197,
"learning_rate": 4.7493897179422366e-06,
"loss": 0.1262,
"sft_loss": 0.014538243343122304,
"step": 575,
"total_loss": 0.016324132579029538,
"value_loss": 0.017858891680498347,
"value_loss_search": 0.043000358385506844,
"value_loss_thought": 0.09987077550104004
},
{
"epoch": 3.46,
"grad_norm": 0.7473511902604624,
"learning_rate": 4.741732169705829e-06,
"loss": 0.128,
"sft_loss": 0.01885277854744345,
"step": 580,
"total_loss": 0.020614391867351856,
"value_loss": 0.01761613485025464,
"value_loss_search": 0.07391770112133145,
"value_loss_thought": 0.06701137741147249
},
{
"epoch": 3.49,
"grad_norm": 0.8672128287076202,
"learning_rate": 4.733965750046987e-06,
"loss": 0.1306,
"sft_loss": 0.011551224719733,
"step": 585,
"total_loss": 0.012892319760794635,
"value_loss": 0.013410949158787844,
"value_loss_search": 0.044143242872939935,
"value_loss_thought": 0.06314435001149832
},
{
"epoch": 3.52,
"grad_norm": 1.1458177098839266,
"learning_rate": 4.72609083614777e-06,
"loss": 0.1292,
"sft_loss": 0.014341436792165041,
"step": 590,
"total_loss": 0.015776935562007564,
"value_loss": 0.014354987322531088,
"value_loss_search": 0.05364567942300482,
"value_loss_thought": 0.06119421870671431
},
{
"epoch": 3.55,
"grad_norm": 0.9268770278205384,
"learning_rate": 4.71810781045934e-06,
"loss": 0.1297,
"sft_loss": 0.015257505606859923,
"step": 595,
"total_loss": 0.017370533025075473,
"value_loss": 0.02113027337927633,
"value_loss_search": 0.059170720773727224,
"value_loss_thought": 0.1098714638722413
},
{
"epoch": 3.58,
"grad_norm": 0.868085256277253,
"learning_rate": 4.710017060683396e-06,
"loss": 0.1425,
"sft_loss": 0.01423517488874495,
"step": 600,
"total_loss": 0.01731622647575932,
"value_loss": 0.03081051432359345,
"value_loss_search": 0.0980118739993486,
"value_loss_thought": 0.14847224191835268
},
{
"epoch": 3.61,
"grad_norm": 1.0537209668511847,
"learning_rate": 4.701818979753331e-06,
"loss": 0.1321,
"sft_loss": 0.014892896311357618,
"step": 605,
"total_loss": 0.016185236236145782,
"value_loss": 0.012923399927421997,
"value_loss_search": 0.04884319881856527,
"value_loss_thought": 0.05454400067619645
},
{
"epoch": 3.64,
"grad_norm": 0.8700570072330364,
"learning_rate": 4.693513965815163e-06,
"loss": 0.1508,
"sft_loss": 0.019580099708400668,
"step": 610,
"total_loss": 0.02092106218860863,
"value_loss": 0.013409624124460607,
"value_loss_search": 0.04490554911869822,
"value_loss_thought": 0.06237144449660263
},
{
"epoch": 3.67,
"grad_norm": 1.2788774174216047,
"learning_rate": 4.6851024222081905e-06,
"loss": 0.1375,
"sft_loss": 0.014192894287407398,
"step": 615,
"total_loss": 0.015467491799489608,
"value_loss": 0.012745974849167396,
"value_loss_search": 0.05650571903670425,
"value_loss_thought": 0.045462077876436524
},
{
"epoch": 3.7,
"grad_norm": 1.1384751682462817,
"learning_rate": 4.676584757445406e-06,
"loss": 0.1391,
"sft_loss": 0.01579129050951451,
"step": 620,
"total_loss": 0.017925446468007068,
"value_loss": 0.021341558844778775,
"value_loss_search": 0.04930673633061815,
"value_loss_thought": 0.12142573465496298
},
{
"epoch": 3.73,
"grad_norm": 0.9386975353990433,
"learning_rate": 4.667961385193656e-06,
"loss": 0.1358,
"sft_loss": 0.01762553579173982,
"step": 625,
"total_loss": 0.01908148711366948,
"value_loss": 0.014559512600328617,
"value_loss_search": 0.0564095867106289,
"value_loss_thought": 0.06006651317602518
},
{
"epoch": 3.76,
"grad_norm": 1.09540391640531,
"learning_rate": 4.659232724253553e-06,
"loss": 0.1426,
"sft_loss": 0.012116698501631617,
"step": 630,
"total_loss": 0.013968724870528604,
"value_loss": 0.01852026394344648,
"value_loss_search": 0.037771890809654,
"value_loss_thought": 0.11039022141931128
},
{
"epoch": 3.79,
"grad_norm": 1.1403346821597187,
"learning_rate": 4.650399198539132e-06,
"loss": 0.14,
"sft_loss": 0.017072572163306175,
"step": 635,
"total_loss": 0.018839041521141554,
"value_loss": 0.017664692708285655,
"value_loss_search": 0.0408900124222555,
"value_loss_thought": 0.10042752822591865
},
{
"epoch": 3.82,
"grad_norm": 0.8573467809405049,
"learning_rate": 4.641461237057267e-06,
"loss": 0.1402,
"sft_loss": 0.01425269797910005,
"step": 640,
"total_loss": 0.01625731455909545,
"value_loss": 0.020046164246105037,
"value_loss_search": 0.06718737064234119,
"value_loss_thought": 0.09318194256302377
},
{
"epoch": 3.85,
"grad_norm": 0.6970362791399626,
"learning_rate": 4.632419273886835e-06,
"loss": 0.1257,
"sft_loss": 0.013124658446758985,
"step": 645,
"total_loss": 0.014949945608714188,
"value_loss": 0.01825287001252036,
"value_loss_search": 0.047629779761018655,
"value_loss_thought": 0.09839317841679077
},
{
"epoch": 3.88,
"grad_norm": 1.0570192724917926,
"learning_rate": 4.62327374815763e-06,
"loss": 0.1318,
"sft_loss": 0.014157434459775686,
"step": 650,
"total_loss": 0.016517047015622666,
"value_loss": 0.023596125619087617,
"value_loss_search": 0.06047542081159918,
"value_loss_thought": 0.12829358246017364
},
{
"epoch": 3.91,
"grad_norm": 1.4519497262292758,
"learning_rate": 4.614025104029046e-06,
"loss": 0.1304,
"sft_loss": 0.015446552366483957,
"step": 655,
"total_loss": 0.017752421715067614,
"value_loss": 0.02305869280771731,
"value_loss_search": 0.05821381665200533,
"value_loss_thought": 0.12625572479846597
},
{
"epoch": 3.94,
"grad_norm": 1.4038621587755458,
"learning_rate": 4.6046737906684955e-06,
"loss": 0.1222,
"sft_loss": 0.013910213205963372,
"step": 660,
"total_loss": 0.016714950121104265,
"value_loss": 0.028047367616863994,
"value_loss_search": 0.07310631623822701,
"value_loss_thought": 0.15127262340945435
},
{
"epoch": 3.97,
"grad_norm": 1.5045007836682485,
"learning_rate": 4.5952202622296015e-06,
"loss": 0.1354,
"sft_loss": 0.013794716726988555,
"step": 665,
"total_loss": 0.015795043228149553,
"value_loss": 0.020003264624756413,
"value_loss_search": 0.0463893650089517,
"value_loss_thought": 0.11363675205575419
},
{
"epoch": 4.0,
"grad_norm": 1.1345147085500709,
"learning_rate": 4.585664977830142e-06,
"loss": 0.1411,
"sft_loss": 0.01729184603318572,
"step": 670,
"total_loss": 0.01970745405710659,
"value_loss": 0.024156079764179595,
"value_loss_search": 0.04846977311616456,
"value_loss_thought": 0.14477886538206802
},
{
"epoch": 4.03,
"grad_norm": 0.7700098195196915,
"learning_rate": 4.576008401529746e-06,
"loss": 0.0925,
"sft_loss": 0.00861083798808977,
"step": 675,
"total_loss": 0.009520014304735014,
"value_loss": 0.009091763565447764,
"value_loss_search": 0.02745309268981373,
"value_loss_thought": 0.04528101538708142
},
{
"epoch": 4.06,
"grad_norm": 1.2543052602388458,
"learning_rate": 4.566251002307363e-06,
"loss": 0.0957,
"sft_loss": 0.009448495891410858,
"step": 680,
"total_loss": 0.010420851569779187,
"value_loss": 0.00972355670351135,
"value_loss_search": 0.043272700070008344,
"value_loss_thought": 0.03451575428043725
},
{
"epoch": 4.09,
"grad_norm": 0.7516221051255656,
"learning_rate": 4.556393254038486e-06,
"loss": 0.0866,
"sft_loss": 0.00772422740701586,
"step": 685,
"total_loss": 0.008484165449101511,
"value_loss": 0.007599379828752717,
"value_loss_search": 0.028828184264500577,
"value_loss_thought": 0.03196685446855554
},
{
"epoch": 4.12,
"grad_norm": 0.9845886373213341,
"learning_rate": 4.546435635472133e-06,
"loss": 0.0995,
"sft_loss": 0.0058452394208870825,
"step": 690,
"total_loss": 0.006937743502530225,
"value_loss": 0.010925040280527298,
"value_loss_search": 0.038594244975945456,
"value_loss_thought": 0.04880607676673208
},
{
"epoch": 4.15,
"grad_norm": 0.9063088407123128,
"learning_rate": 4.536378630207598e-06,
"loss": 0.0863,
"sft_loss": 0.009303204133175313,
"step": 695,
"total_loss": 0.010333730792467577,
"value_loss": 0.01030526671976304,
"value_loss_search": 0.04375397186494183,
"value_loss_thought": 0.038688162007019855
},
{
"epoch": 4.18,
"grad_norm": 0.6582700642646613,
"learning_rate": 4.526222726670966e-06,
"loss": 0.0954,
"sft_loss": 0.011040500248782337,
"step": 700,
"total_loss": 0.01200565958547486,
"value_loss": 0.009651594169963574,
"value_loss_search": 0.03989513861957903,
"value_loss_thought": 0.037317614558742204
},
{
"epoch": 4.21,
"grad_norm": 0.743159814030366,
"learning_rate": 4.515968418091394e-06,
"loss": 0.0899,
"sft_loss": 0.010487070470117033,
"step": 705,
"total_loss": 0.011825196649658665,
"value_loss": 0.013381260839241804,
"value_loss_search": 0.036946902065199086,
"value_loss_thought": 0.07010318333050236
},
{
"epoch": 4.24,
"grad_norm": 0.9990248193841446,
"learning_rate": 4.505616202477152e-06,
"loss": 0.1002,
"sft_loss": 0.009330611897166819,
"step": 710,
"total_loss": 0.009855728735766433,
"value_loss": 0.0052511682242766256,
"value_loss_search": 0.02117164042309696,
"value_loss_thought": 0.020837705221492798
},
{
"epoch": 4.27,
"grad_norm": 0.7065067498607398,
"learning_rate": 4.49516658259144e-06,
"loss": 0.1018,
"sft_loss": 0.013342563062906265,
"step": 715,
"total_loss": 0.014073112735923132,
"value_loss": 0.0073054967775533445,
"value_loss_search": 0.023805043454274255,
"value_loss_thought": 0.03463893002911504
},
{
"epoch": 4.3,
"grad_norm": 0.6847509331487238,
"learning_rate": 4.48462006592797e-06,
"loss": 0.0993,
"sft_loss": 0.010668217262718827,
"step": 720,
"total_loss": 0.011838797037410132,
"value_loss": 0.011705797309559784,
"value_loss_search": 0.047429115066006486,
"value_loss_thought": 0.046217263146081676
},
{
"epoch": 4.33,
"grad_norm": 0.6685576496831016,
"learning_rate": 4.473977164686321e-06,
"loss": 0.0944,
"sft_loss": 0.008604107843711972,
"step": 725,
"total_loss": 0.009628339805681207,
"value_loss": 0.010242320024417495,
"value_loss_search": 0.035196531142617005,
"value_loss_thought": 0.04674202849655558
},
{
"epoch": 4.36,
"grad_norm": 0.75658777055452,
"learning_rate": 4.46323839574706e-06,
"loss": 0.1034,
"sft_loss": 0.010033530904911459,
"step": 730,
"total_loss": 0.011016872639970643,
"value_loss": 0.009833417008576362,
"value_loss_search": 0.02881108484938295,
"value_loss_thought": 0.04985625083095328
},
{
"epoch": 4.39,
"grad_norm": 0.688131213767825,
"learning_rate": 4.45240428064664e-06,
"loss": 0.095,
"sft_loss": 0.013836181082297117,
"step": 735,
"total_loss": 0.014805314792602076,
"value_loss": 0.009691335982915916,
"value_loss_search": 0.036203794245648166,
"value_loss_thought": 0.04132689285597735
},
{
"epoch": 4.42,
"grad_norm": 0.7096504880990531,
"learning_rate": 4.4414753455520795e-06,
"loss": 0.0982,
"sft_loss": 0.009608610440045596,
"step": 740,
"total_loss": 0.010440762112193625,
"value_loss": 0.00832151709196296,
"value_loss_search": 0.029671950675117388,
"value_loss_thought": 0.036900186177172146
},
{
"epoch": 4.45,
"grad_norm": 0.6217612986336695,
"learning_rate": 4.430452121235396e-06,
"loss": 0.0971,
"sft_loss": 0.012536874134093523,
"step": 745,
"total_loss": 0.014267633290864978,
"value_loss": 0.017307592170084263,
"value_loss_search": 0.034450266263445425,
"value_loss_thought": 0.10401047060558995
},
{
"epoch": 4.48,
"grad_norm": 0.797630098293788,
"learning_rate": 4.419335143047834e-06,
"loss": 0.1029,
"sft_loss": 0.014243904023896903,
"step": 750,
"total_loss": 0.015265003470494776,
"value_loss": 0.010210994727276556,
"value_loss_search": 0.032982522548752516,
"value_loss_thought": 0.048705436136879145
},
{
"epoch": 4.51,
"grad_norm": 0.6961134075635874,
"learning_rate": 4.408124950893868e-06,
"loss": 0.095,
"sft_loss": 0.012445987621322274,
"step": 755,
"total_loss": 0.01341653695722016,
"value_loss": 0.009705493019964706,
"value_loss_search": 0.03983713596752523,
"value_loss_thought": 0.03780680882375691
},
{
"epoch": 4.54,
"grad_norm": 0.5762572982567685,
"learning_rate": 4.396822089204981e-06,
"loss": 0.0998,
"sft_loss": 0.011862749280408025,
"step": 760,
"total_loss": 0.013249826465107617,
"value_loss": 0.013870771408619476,
"value_loss_search": 0.0406148220283626,
"value_loss_thought": 0.07035134897487297
},
{
"epoch": 4.57,
"grad_norm": 0.7824308185518036,
"learning_rate": 4.3854271069132195e-06,
"loss": 0.1015,
"sft_loss": 0.012830629444215447,
"step": 765,
"total_loss": 0.01348854236812258,
"value_loss": 0.006579129521428228,
"value_loss_search": 0.02838464450221636,
"value_loss_thought": 0.024248391312721652
},
{
"epoch": 4.6,
"grad_norm": 1.115871263491779,
"learning_rate": 4.373940557424537e-06,
"loss": 0.0929,
"sft_loss": 0.013013543572742491,
"step": 770,
"total_loss": 0.014466597687578542,
"value_loss": 0.014530540375199052,
"value_loss_search": 0.044251333865793184,
"value_loss_thought": 0.07199298751397691
},
{
"epoch": 4.63,
"grad_norm": 0.5639360855348526,
"learning_rate": 4.36236299859192e-06,
"loss": 0.0942,
"sft_loss": 0.010940996720455587,
"step": 775,
"total_loss": 0.01194770065769717,
"value_loss": 0.010067038443867204,
"value_loss_search": 0.032814532905274515,
"value_loss_thought": 0.04772177415952683
},
{
"epoch": 4.65,
"grad_norm": 1.1373613205181072,
"learning_rate": 4.350694992688289e-06,
"loss": 0.1003,
"sft_loss": 0.009771948284469544,
"step": 780,
"total_loss": 0.010786435697650632,
"value_loss": 0.010144873889385054,
"value_loss_search": 0.04210830284959002,
"value_loss_thought": 0.03905068875028519
},
{
"epoch": 4.68,
"grad_norm": 0.7728548735492248,
"learning_rate": 4.338937106379199e-06,
"loss": 0.0986,
"sft_loss": 0.01460006288252771,
"step": 785,
"total_loss": 0.015538786767115198,
"value_loss": 0.009387238657767227,
"value_loss_search": 0.03280571626974051,
"value_loss_thought": 0.04229219339322299
},
{
"epoch": 4.71,
"grad_norm": 0.8732356620640505,
"learning_rate": 4.32708991069531e-06,
"loss": 0.0941,
"sft_loss": 0.009909180679824204,
"step": 790,
"total_loss": 0.01077013838946641,
"value_loss": 0.0086095773167699,
"value_loss_search": 0.0318576935078454,
"value_loss_thought": 0.03701892458407201
},
{
"epoch": 4.74,
"grad_norm": 1.143053369270145,
"learning_rate": 4.315153981004666e-06,
"loss": 0.0966,
"sft_loss": 0.009806250128895045,
"step": 795,
"total_loss": 0.011583962598513154,
"value_loss": 0.017777125288466776,
"value_loss_search": 0.040363578098720154,
"value_loss_thought": 0.1018534237449785
},
{
"epoch": 4.77,
"grad_norm": 0.7780179009340498,
"learning_rate": 4.3031298969847406e-06,
"loss": 0.1002,
"sft_loss": 0.009359034500084818,
"step": 800,
"total_loss": 0.010968359952437367,
"value_loss": 0.016093254049246754,
"value_loss_search": 0.05174213330789144,
"value_loss_thought": 0.07700389907186037
},
{
"epoch": 4.8,
"grad_norm": 0.7183429149076678,
"learning_rate": 4.29101824259429e-06,
"loss": 0.1018,
"sft_loss": 0.009059342555701733,
"step": 805,
"total_loss": 0.010009716682918678,
"value_loss": 0.009503741757498574,
"value_loss_search": 0.03197408163345017,
"value_loss_thought": 0.0440558526033783
},
{
"epoch": 4.83,
"grad_norm": 0.6507483126573996,
"learning_rate": 4.2788196060449925e-06,
"loss": 0.1002,
"sft_loss": 0.010762089781928807,
"step": 810,
"total_loss": 0.012873523510279483,
"value_loss": 0.021114336799837475,
"value_loss_search": 0.04117565880492293,
"value_loss_thought": 0.12773903450543003
},
{
"epoch": 4.86,
"grad_norm": 0.8117830572861622,
"learning_rate": 4.266534579772881e-06,
"loss": 0.0998,
"sft_loss": 0.016996108298189937,
"step": 815,
"total_loss": 0.01824291221478802,
"value_loss": 0.012468039073382897,
"value_loss_search": 0.026130347241348773,
"value_loss_thought": 0.07361396466958467
},
{
"epoch": 4.89,
"grad_norm": 0.9626098886691061,
"learning_rate": 4.254163760409571e-06,
"loss": 0.1041,
"sft_loss": 0.01157297370955348,
"step": 820,
"total_loss": 0.012655413702026408,
"value_loss": 0.01082440062873502,
"value_loss_search": 0.029132241165916638,
"value_loss_thought": 0.057462964772071246
},
{
"epoch": 4.92,
"grad_norm": 0.6101284730372293,
"learning_rate": 4.2417077487532835e-06,
"loss": 0.0917,
"sft_loss": 0.00924121611751616,
"step": 825,
"total_loss": 0.010044124071836791,
"value_loss": 0.008029079741447731,
"value_loss_search": 0.031784425488490343,
"value_loss_thought": 0.032448212833514845
},
{
"epoch": 4.95,
"grad_norm": 0.8024833457256252,
"learning_rate": 4.229167149739667e-06,
"loss": 0.094,
"sft_loss": 0.00918948817998171,
"step": 830,
"total_loss": 0.010426052451214219,
"value_loss": 0.01236564262903812,
"value_loss_search": 0.03679989460033539,
"value_loss_thought": 0.062125246099412834
},
{
"epoch": 4.98,
"grad_norm": 1.1622516088977335,
"learning_rate": 4.216542572412423e-06,
"loss": 0.0952,
"sft_loss": 0.007891789707355202,
"step": 835,
"total_loss": 0.00946709308821454,
"value_loss": 0.015753033644841707,
"value_loss_search": 0.03111598624600447,
"value_loss_thought": 0.0949082830469706
},
{
"epoch": 5.01,
"grad_norm": 0.6908442768231199,
"learning_rate": 4.203834629893719e-06,
"loss": 0.0811,
"sft_loss": 0.007326198380906135,
"step": 840,
"total_loss": 0.008643819743883795,
"value_loss": 0.013176213806036684,
"value_loss_search": 0.05124346678378515,
"value_loss_thought": 0.0541662441482913
},
{
"epoch": 5.04,
"grad_norm": 0.7901490430318995,
"learning_rate": 4.19104393935442e-06,
"loss": 0.0662,
"sft_loss": 0.007722388836555183,
"step": 845,
"total_loss": 0.00843456873717514,
"value_loss": 0.0071217986454485075,
"value_loss_search": 0.028109038909803985,
"value_loss_thought": 0.028865350570777083
},
{
"epoch": 5.07,
"grad_norm": 1.0101019251398367,
"learning_rate": 4.178171121984109e-06,
"loss": 0.0699,
"sft_loss": 0.008467405906412751,
"step": 850,
"total_loss": 0.008816118605045631,
"value_loss": 0.003487127016751401,
"value_loss_search": 0.013744208114383127,
"value_loss_thought": 0.014152807989376015
},
{
"epoch": 5.1,
"grad_norm": 0.653401555518481,
"learning_rate": 4.16521680296092e-06,
"loss": 0.067,
"sft_loss": 0.010541580687277018,
"step": 855,
"total_loss": 0.011391171138998856,
"value_loss": 0.008495904014353073,
"value_loss_search": 0.02280127693209124,
"value_loss_thought": 0.045165955742459116
},
{
"epoch": 5.13,
"grad_norm": 0.8573274418097468,
"learning_rate": 4.152181611421179e-06,
"loss": 0.0607,
"sft_loss": 0.00878625299083069,
"step": 860,
"total_loss": 0.009619847631120138,
"value_loss": 0.00833594629189065,
"value_loss_search": 0.02539439773918275,
"value_loss_thought": 0.04129317272327171
},
{
"epoch": 5.16,
"grad_norm": 0.6170985982380647,
"learning_rate": 4.139066180428846e-06,
"loss": 0.0655,
"sft_loss": 0.009596668492304162,
"step": 865,
"total_loss": 0.010120830915363399,
"value_loss": 0.005241623797928696,
"value_loss_search": 0.021597909021647866,
"value_loss_thought": 0.020335081457960768
},
{
"epoch": 5.19,
"grad_norm": 0.825156846628994,
"learning_rate": 4.125871146944771e-06,
"loss": 0.0695,
"sft_loss": 0.010449141904246062,
"step": 870,
"total_loss": 0.011130747148763475,
"value_loss": 0.006816053235161235,
"value_loss_search": 0.02648081009813268,
"value_loss_thought": 0.028047615623654563
},
{
"epoch": 5.22,
"grad_norm": 0.5819547422187984,
"learning_rate": 4.112597151795758e-06,
"loss": 0.063,
"sft_loss": 0.00804176195524633,
"step": 875,
"total_loss": 0.009046485570181062,
"value_loss": 0.010047235474598893,
"value_loss_search": 0.022064273892272012,
"value_loss_thought": 0.05831360963654788
},
{
"epoch": 5.25,
"grad_norm": 0.8024901403445527,
"learning_rate": 4.099244839643448e-06,
"loss": 0.0589,
"sft_loss": 0.007425288169179112,
"step": 880,
"total_loss": 0.007866452395560409,
"value_loss": 0.004411642082025935,
"value_loss_search": 0.01583855556901881,
"value_loss_thought": 0.019454581364698242
},
{
"epoch": 5.28,
"grad_norm": 0.6299816489940798,
"learning_rate": 4.085814858953001e-06,
"loss": 0.0707,
"sft_loss": 0.010274743323680013,
"step": 885,
"total_loss": 0.011220432494735633,
"value_loss": 0.009456892390926442,
"value_loss_search": 0.017595245018355854,
"value_loss_thought": 0.05805989508698985
},
{
"epoch": 5.31,
"grad_norm": 0.5775443078383485,
"learning_rate": 4.072307861961614e-06,
"loss": 0.0668,
"sft_loss": 0.007527518505230546,
"step": 890,
"total_loss": 0.008477710493883706,
"value_loss": 0.00950191973965957,
"value_loss_search": 0.03369882960868278,
"value_loss_thought": 0.042316528412220576
},
{
"epoch": 5.34,
"grad_norm": 0.7217258713196177,
"learning_rate": 4.058724504646834e-06,
"loss": 0.0695,
"sft_loss": 0.008143483952153474,
"step": 895,
"total_loss": 0.009240032660204633,
"value_loss": 0.01096548721779982,
"value_loss_search": 0.03710676123126859,
"value_loss_thought": 0.050617136721757564
},
{
"epoch": 5.37,
"grad_norm": 0.7509981121776955,
"learning_rate": 4.045065446694709e-06,
"loss": 0.0623,
"sft_loss": 0.009156511997571216,
"step": 900,
"total_loss": 0.009822533285591817,
"value_loss": 0.006660212647693698,
"value_loss_search": 0.02795750253162623,
"value_loss_thought": 0.02532419814169771
},
{
"epoch": 5.4,
"grad_norm": 0.5391534216473769,
"learning_rate": 4.031331351467744e-06,
"loss": 0.0693,
"sft_loss": 0.006840780581114814,
"step": 905,
"total_loss": 0.007346747693952693,
"value_loss": 0.005059671220953988,
"value_loss_search": 0.023152905100801036,
"value_loss_thought": 0.017324464485159296
},
{
"epoch": 5.43,
"grad_norm": 0.8478085328901763,
"learning_rate": 4.017522885972687e-06,
"loss": 0.066,
"sft_loss": 0.00748998821945861,
"step": 910,
"total_loss": 0.008190430112915693,
"value_loss": 0.007004418966835147,
"value_loss_search": 0.0241462381236488,
"value_loss_thought": 0.03188911370725691
},
{
"epoch": 5.46,
"grad_norm": 0.6273190350109188,
"learning_rate": 4.0036407208281335e-06,
"loss": 0.0642,
"sft_loss": 0.007753341854549944,
"step": 915,
"total_loss": 0.008310681724867663,
"value_loss": 0.0055733984515427435,
"value_loss_search": 0.017653270972687096,
"value_loss_thought": 0.026933916354755637
},
{
"epoch": 5.49,
"grad_norm": 0.6275397576143824,
"learning_rate": 3.989685530231958e-06,
"loss": 0.0723,
"sft_loss": 0.008013604581356049,
"step": 920,
"total_loss": 0.008786806087823607,
"value_loss": 0.007732015183682961,
"value_loss_search": 0.018130889449082588,
"value_loss_thought": 0.04372523128440662
},
{
"epoch": 5.52,
"grad_norm": 0.6557274591871389,
"learning_rate": 3.975657991928573e-06,
"loss": 0.0654,
"sft_loss": 0.007132729375734925,
"step": 925,
"total_loss": 0.0075831895907356285,
"value_loss": 0.004504602500765032,
"value_loss_search": 0.017856063829344748,
"value_loss_thought": 0.018180756335345903
},
{
"epoch": 5.55,
"grad_norm": 0.6502931359394508,
"learning_rate": 3.961558787176012e-06,
"loss": 0.0702,
"sft_loss": 0.0077712137601338325,
"step": 930,
"total_loss": 0.008426044349789663,
"value_loss": 0.006548305749731753,
"value_loss_search": 0.020701198827305235,
"value_loss_thought": 0.03168524749562494
},
{
"epoch": 5.58,
"grad_norm": 0.5593737214026034,
"learning_rate": 3.9473886007128424e-06,
"loss": 0.0674,
"sft_loss": 0.0070376997464336455,
"step": 935,
"total_loss": 0.007807116948492876,
"value_loss": 0.007694172377568975,
"value_loss_search": 0.02574841559246579,
"value_loss_thought": 0.03580496397080424
},
{
"epoch": 5.61,
"grad_norm": 0.5565142984447847,
"learning_rate": 3.933148120724913e-06,
"loss": 0.0695,
"sft_loss": 0.005548381910193712,
"step": 940,
"total_loss": 0.006512936933782498,
"value_loss": 0.009645550738008523,
"value_loss_search": 0.02102770657133988,
"value_loss_thought": 0.056136698765476466
},
{
"epoch": 5.64,
"grad_norm": 0.6653147056652764,
"learning_rate": 3.9188380388119325e-06,
"loss": 0.075,
"sft_loss": 0.009176643792307005,
"step": 945,
"total_loss": 0.010155323132971716,
"value_loss": 0.009786792815066291,
"value_loss_search": 0.02649424351284324,
"value_loss_thought": 0.05180009940095261
},
{
"epoch": 5.67,
"grad_norm": 0.4898734764580984,
"learning_rate": 3.904459049953877e-06,
"loss": 0.0661,
"sft_loss": 0.008050526608712971,
"step": 950,
"total_loss": 0.008603187052369777,
"value_loss": 0.005526604052067796,
"value_loss_search": 0.02066867913152919,
"value_loss_thought": 0.0235441530123353
},
{
"epoch": 5.7,
"grad_norm": 0.7610416619124152,
"learning_rate": 3.890011852477243e-06,
"loss": 0.0712,
"sft_loss": 0.00837269393960014,
"step": 955,
"total_loss": 0.009244194875520861,
"value_loss": 0.00871500901721447,
"value_loss_search": 0.026370803010149758,
"value_loss_thought": 0.04334926914202271
},
{
"epoch": 5.73,
"grad_norm": 0.7172771316565127,
"learning_rate": 3.875497148021129e-06,
"loss": 0.0726,
"sft_loss": 0.008401849202346056,
"step": 960,
"total_loss": 0.008891117146237092,
"value_loss": 0.004892679379145193,
"value_loss_search": 0.01661159728510029,
"value_loss_thought": 0.022529837636739103
},
{
"epoch": 5.76,
"grad_norm": 0.6480324222376628,
"learning_rate": 3.860915641503161e-06,
"loss": 0.0646,
"sft_loss": 0.00682629911461845,
"step": 965,
"total_loss": 0.007262193315084176,
"value_loss": 0.004358942487397144,
"value_loss_search": 0.01821862360557134,
"value_loss_thought": 0.016652916284147067
},
{
"epoch": 5.79,
"grad_norm": 0.6949624103946892,
"learning_rate": 3.84626804108526e-06,
"loss": 0.0719,
"sft_loss": 0.008461356349289417,
"step": 970,
"total_loss": 0.009027346382336533,
"value_loss": 0.005659900530326922,
"value_loss_search": 0.019068497527393903,
"value_loss_thought": 0.02621070666225478
},
{
"epoch": 5.82,
"grad_norm": 0.7622591624750499,
"learning_rate": 3.831555058139244e-06,
"loss": 0.0707,
"sft_loss": 0.007402687979629263,
"step": 975,
"total_loss": 0.00789324305359287,
"value_loss": 0.004905550461671737,
"value_loss_search": 0.013050667314735165,
"value_loss_thought": 0.02619373640912954
},
{
"epoch": 5.85,
"grad_norm": 0.6838250232406838,
"learning_rate": 3.8167774072122854e-06,
"loss": 0.0673,
"sft_loss": 0.008353895461186766,
"step": 980,
"total_loss": 0.00888050429666123,
"value_loss": 0.005266088167263661,
"value_loss_search": 0.01670047964719288,
"value_loss_thought": 0.025428225997166008
},
{
"epoch": 5.88,
"grad_norm": 0.9218213608030134,
"learning_rate": 3.8019358059922052e-06,
"loss": 0.0708,
"sft_loss": 0.006913194921799004,
"step": 985,
"total_loss": 0.007807648131026213,
"value_loss": 0.008944532042056608,
"value_loss_search": 0.02053129872363115,
"value_loss_thought": 0.05102495740206905
},
{
"epoch": 5.91,
"grad_norm": 0.7564910242846892,
"learning_rate": 3.7870309752726185e-06,
"loss": 0.064,
"sft_loss": 0.0055543248075991866,
"step": 990,
"total_loss": 0.00604638959850945,
"value_loss": 0.004920647482958884,
"value_loss_search": 0.01775021549135545,
"value_loss_thought": 0.021614964383616098
},
{
"epoch": 5.94,
"grad_norm": 0.7406424491645267,
"learning_rate": 3.772063638917931e-06,
"loss": 0.062,
"sft_loss": 0.007381244131829589,
"step": 995,
"total_loss": 0.00787693980830113,
"value_loss": 0.004956956215028186,
"value_loss_search": 0.0186556116294355,
"value_loss_thought": 0.02100003812029172
},
{
"epoch": 5.97,
"grad_norm": 0.8597100129196285,
"learning_rate": 3.75703452382818e-06,
"loss": 0.0686,
"sft_loss": 0.0067865438759326935,
"step": 1000,
"total_loss": 0.0074210830740980786,
"value_loss": 0.0063453914136630376,
"value_loss_search": 0.020665711158505927,
"value_loss_thought": 0.030097420751735625
},
{
"epoch": 6.0,
"grad_norm": 0.6387120370362654,
"learning_rate": 3.741944359903734e-06,
"loss": 0.0685,
"sft_loss": 0.007980260415934026,
"step": 1005,
"total_loss": 0.00841312516255357,
"value_loss": 0.00432864762956342,
"value_loss_search": 0.016856307840919273,
"value_loss_thought": 0.017772873218109452
},
{
"epoch": 6.03,
"grad_norm": 0.47299058750162554,
"learning_rate": 3.7267938800098454e-06,
"loss": 0.0455,
"sft_loss": 0.005139627197058872,
"step": 1010,
"total_loss": 0.005742018675823602,
"value_loss": 0.006023914470279124,
"value_loss_search": 0.014465152566535267,
"value_loss_thought": 0.03372616275910332
},
{
"epoch": 6.06,
"grad_norm": 0.5769250479404755,
"learning_rate": 3.7115838199410566e-06,
"loss": 0.0442,
"sft_loss": 0.004237775265937671,
"step": 1015,
"total_loss": 0.004536310447571168,
"value_loss": 0.002985352237874395,
"value_loss_search": 0.012497988520624403,
"value_loss_thought": 0.01138482937376466
},
{
"epoch": 6.09,
"grad_norm": 0.5775093614029521,
"learning_rate": 3.696314918385466e-06,
"loss": 0.0455,
"sft_loss": 0.004376189230242744,
"step": 1020,
"total_loss": 0.004725277596082833,
"value_loss": 0.0034908839002582683,
"value_loss_search": 0.012387420580819253,
"value_loss_thought": 0.015539650371397328
},
{
"epoch": 6.12,
"grad_norm": 0.5743014343738564,
"learning_rate": 3.680987916888855e-06,
"loss": 0.0464,
"sft_loss": 0.004472998692654074,
"step": 1025,
"total_loss": 0.004852331803667198,
"value_loss": 0.0037933312117729655,
"value_loss_search": 0.009115806162424179,
"value_loss_thought": 0.02123084324521187
},
{
"epoch": 6.15,
"grad_norm": 1.0135045723056435,
"learning_rate": 3.6656035598186717e-06,
"loss": 0.0483,
"sft_loss": 0.006532586639514193,
"step": 1030,
"total_loss": 0.007033583752149753,
"value_loss": 0.005009971209847208,
"value_loss_search": 0.01218861587973379,
"value_loss_thought": 0.02789115408404541
},
{
"epoch": 6.18,
"grad_norm": 0.8265173915326937,
"learning_rate": 3.650162594327881e-06,
"loss": 0.0464,
"sft_loss": 0.005671659158542753,
"step": 1035,
"total_loss": 0.006245543843522228,
"value_loss": 0.005738847342217923,
"value_loss_search": 0.015518051942945022,
"value_loss_thought": 0.030392726854552167
},
{
"epoch": 6.21,
"grad_norm": 0.47962371612527804,
"learning_rate": 3.634665770318678e-06,
"loss": 0.0413,
"sft_loss": 0.005972391797695309,
"step": 1040,
"total_loss": 0.006403001316243717,
"value_loss": 0.004306095417541655,
"value_loss_search": 0.016831279399025335,
"value_loss_thought": 0.017617484057427645
},
{
"epoch": 6.24,
"grad_norm": 1.0086948671095952,
"learning_rate": 3.619113840406071e-06,
"loss": 0.0491,
"sft_loss": 0.004732140112901106,
"step": 1045,
"total_loss": 0.005075448810612215,
"value_loss": 0.003433087062830964,
"value_loss_search": 0.009895986284152513,
"value_loss_thought": 0.01756871009142742
},
{
"epoch": 6.27,
"grad_norm": 0.6224166386357451,
"learning_rate": 3.6035075598813275e-06,
"loss": 0.0508,
"sft_loss": 0.005647319235140458,
"step": 1050,
"total_loss": 0.005981965384165733,
"value_loss": 0.0033464612621173727,
"value_loss_search": 0.014234105288312548,
"value_loss_thought": 0.012537584816254822
},
{
"epoch": 6.3,
"grad_norm": 0.6803707206328181,
"learning_rate": 3.587847686675293e-06,
"loss": 0.0444,
"sft_loss": 0.004154384031426162,
"step": 1055,
"total_loss": 0.0045537911174491795,
"value_loss": 0.0039940711957683565,
"value_loss_search": 0.012944002056576665,
"value_loss_thought": 0.019008567334594773
},
{
"epoch": 6.33,
"grad_norm": 0.6488360833237377,
"learning_rate": 3.572134981321582e-06,
"loss": 0.0464,
"sft_loss": 0.00495091185439378,
"step": 1060,
"total_loss": 0.005601490682847654,
"value_loss": 0.006505788123034506,
"value_loss_search": 0.00773236445022576,
"value_loss_thought": 0.04431394169132545
},
{
"epoch": 6.36,
"grad_norm": 0.470284848811417,
"learning_rate": 3.556370206919643e-06,
"loss": 0.0412,
"sft_loss": 0.006972516793757677,
"step": 1065,
"total_loss": 0.007320523636008147,
"value_loss": 0.0034800680246689806,
"value_loss_search": 0.015306914580980902,
"value_loss_thought": 0.012533629250856392
},
{
"epoch": 6.39,
"grad_norm": 0.696033260327911,
"learning_rate": 3.5405541290976968e-06,
"loss": 0.0411,
"sft_loss": 0.004306224733591079,
"step": 1070,
"total_loss": 0.004650098075256892,
"value_loss": 0.003438732988161064,
"value_loss_search": 0.013183290196491271,
"value_loss_thought": 0.014326573800536835
},
{
"epoch": 6.42,
"grad_norm": 0.8518349953519152,
"learning_rate": 3.5246875159755554e-06,
"loss": 0.0393,
"sft_loss": 0.004596246278379112,
"step": 1075,
"total_loss": 0.005092797088298085,
"value_loss": 0.004965507755696308,
"value_loss_search": 0.013638158090543584,
"value_loss_thought": 0.026085903684634103
},
{
"epoch": 6.45,
"grad_norm": 0.7830604366139895,
"learning_rate": 3.5087711381273144e-06,
"loss": 0.0483,
"sft_loss": 0.004399802925763652,
"step": 1080,
"total_loss": 0.00474689214189965,
"value_loss": 0.0034708920188450064,
"value_loss_search": 0.014559524009473534,
"value_loss_thought": 0.013207611872235247
},
{
"epoch": 6.48,
"grad_norm": 1.0245098727939808,
"learning_rate": 3.49280576854393e-06,
"loss": 0.0453,
"sft_loss": 0.006848539051134139,
"step": 1085,
"total_loss": 0.007486119516067901,
"value_loss": 0.0063758047096257545,
"value_loss_search": 0.01603142107730946,
"value_loss_thought": 0.034975016615135246
},
{
"epoch": 6.51,
"grad_norm": 0.6521674554950044,
"learning_rate": 3.4767921825956824e-06,
"loss": 0.0487,
"sft_loss": 0.004285465716384352,
"step": 1090,
"total_loss": 0.004569660496787265,
"value_loss": 0.0028419478604973848,
"value_loss_search": 0.008792001151141449,
"value_loss_thought": 0.013943581694456952
},
{
"epoch": 6.53,
"grad_norm": 0.7150319458252575,
"learning_rate": 3.4607311579945124e-06,
"loss": 0.0506,
"sft_loss": 0.006909280724357814,
"step": 1095,
"total_loss": 0.007147605080501762,
"value_loss": 0.0023832433014376875,
"value_loss_search": 0.010291470044728612,
"value_loss_thought": 0.008774476365078953
},
{
"epoch": 6.56,
"grad_norm": 0.9363000353600848,
"learning_rate": 3.444623474756258e-06,
"loss": 0.0527,
"sft_loss": 0.003712919045938179,
"step": 1100,
"total_loss": 0.004117744177119675,
"value_loss": 0.004048251279891701,
"value_loss_search": 0.012587877828525506,
"value_loss_thought": 0.01979813240959629
},
{
"epoch": 6.59,
"grad_norm": 0.5759000271014428,
"learning_rate": 3.4284699151627672e-06,
"loss": 0.0463,
"sft_loss": 0.005106915923533961,
"step": 1105,
"total_loss": 0.0054412948647041045,
"value_loss": 0.0033437895152928832,
"value_loss_search": 0.01137218730814311,
"value_loss_thought": 0.015378128899897092
},
{
"epoch": 6.62,
"grad_norm": 0.7357711398285077,
"learning_rate": 3.412271263723909e-06,
"loss": 0.0434,
"sft_loss": 0.005000182124786079,
"step": 1110,
"total_loss": 0.005477192047459311,
"value_loss": 0.004770098890833197,
"value_loss_search": 0.012354543731896683,
"value_loss_thought": 0.025806247426635308
},
{
"epoch": 6.65,
"grad_norm": 0.8938685745921593,
"learning_rate": 3.3960283071394717e-06,
"loss": 0.0469,
"sft_loss": 0.0063235011184588075,
"step": 1115,
"total_loss": 0.006910537980154174,
"value_loss": 0.005870367820580214,
"value_loss_search": 0.014325453480932993,
"value_loss_thought": 0.032637489220542194
},
{
"epoch": 6.68,
"grad_norm": 0.6909317413670801,
"learning_rate": 3.3797418342609577e-06,
"loss": 0.047,
"sft_loss": 0.004918072844156995,
"step": 1120,
"total_loss": 0.0052730022313710375,
"value_loss": 0.0035492941345751207,
"value_loss_search": 0.010046540721191377,
"value_loss_thought": 0.018347811973399075
},
{
"epoch": 6.71,
"grad_norm": 0.5732090332805206,
"learning_rate": 3.3634126360532694e-06,
"loss": 0.0468,
"sft_loss": 0.005004867579555139,
"step": 1125,
"total_loss": 0.005580213024023806,
"value_loss": 0.005753454113050793,
"value_loss_search": 0.0135478620575046,
"value_loss_thought": 0.03247977066948806
},
{
"epoch": 6.74,
"grad_norm": 0.5934033812484811,
"learning_rate": 3.347041505556298e-06,
"loss": 0.0463,
"sft_loss": 0.005900320567889139,
"step": 1130,
"total_loss": 0.006214558424846928,
"value_loss": 0.0031423781176272312,
"value_loss_search": 0.012277730915150187,
"value_loss_thought": 0.01286129405855263
},
{
"epoch": 6.77,
"grad_norm": 0.6696886862461858,
"learning_rate": 3.3306292378464083e-06,
"loss": 0.0508,
"sft_loss": 0.007460485817864538,
"step": 1135,
"total_loss": 0.007751618086540191,
"value_loss": 0.0029113226871686493,
"value_loss_search": 0.01191347677657859,
"value_loss_thought": 0.011377104656867231
},
{
"epoch": 6.8,
"grad_norm": 0.7790037685619419,
"learning_rate": 3.314176629997825e-06,
"loss": 0.0452,
"sft_loss": 0.004827470483724028,
"step": 1140,
"total_loss": 0.005163270902585282,
"value_loss": 0.003358004500989864,
"value_loss_search": 0.013757221815399134,
"value_loss_thought": 0.013106814269212919
},
{
"epoch": 6.83,
"grad_norm": 0.5505345044218094,
"learning_rate": 3.297684481043922e-06,
"loss": 0.0469,
"sft_loss": 0.0062236432102508845,
"step": 1145,
"total_loss": 0.006526504096120789,
"value_loss": 0.003028608957868073,
"value_loss_search": 0.012845393837199025,
"value_loss_thought": 0.011383477907293127
},
{
"epoch": 6.86,
"grad_norm": 0.46343408162242566,
"learning_rate": 3.281153591938418e-06,
"loss": 0.0433,
"sft_loss": 0.005084162973798811,
"step": 1150,
"total_loss": 0.005412001899304642,
"value_loss": 0.003278389718082053,
"value_loss_search": 0.012043231101108632,
"value_loss_thought": 0.01418388647671236
},
{
"epoch": 6.89,
"grad_norm": 0.5728946583207882,
"learning_rate": 3.264584765516474e-06,
"loss": 0.0513,
"sft_loss": 0.008054213871946558,
"step": 1155,
"total_loss": 0.008398819074250241,
"value_loss": 0.0034460521280379906,
"value_loss_search": 0.013998744965681454,
"value_loss_thought": 0.01356967230240116
},
{
"epoch": 6.92,
"grad_norm": 0.5867841886877614,
"learning_rate": 3.2479788064557084e-06,
"loss": 0.0424,
"sft_loss": 0.004617373802466318,
"step": 1160,
"total_loss": 0.004921634896254546,
"value_loss": 0.003042611040564225,
"value_loss_search": 0.013083630732762686,
"value_loss_thought": 0.01125725753390725
},
{
"epoch": 6.95,
"grad_norm": 0.5749765325052467,
"learning_rate": 3.231336521237113e-06,
"loss": 0.0425,
"sft_loss": 0.005324905528686941,
"step": 1165,
"total_loss": 0.0056936069204539305,
"value_loss": 0.003687014164006541,
"value_loss_search": 0.01334903096231983,
"value_loss_thought": 0.016147082374664022
},
{
"epoch": 6.98,
"grad_norm": 0.5228964319711119,
"learning_rate": 3.2146587181058858e-06,
"loss": 0.049,
"sft_loss": 0.003777366707799956,
"step": 1170,
"total_loss": 0.004077984397645196,
"value_loss": 0.0030061770619795427,
"value_loss_search": 0.011763368438232646,
"value_loss_thought": 0.012286048040141394
},
{
"epoch": 7.01,
"grad_norm": 0.40592168178993326,
"learning_rate": 3.1979462070321817e-06,
"loss": 0.0374,
"sft_loss": 0.005217826526495628,
"step": 1175,
"total_loss": 0.005588610990344023,
"value_loss": 0.003707844631036039,
"value_loss_search": 0.01431789886352135,
"value_loss_thought": 0.015344858078242396
},
{
"epoch": 7.04,
"grad_norm": 0.5057051615052391,
"learning_rate": 3.1811997996717716e-06,
"loss": 0.0303,
"sft_loss": 0.0033945336355827747,
"step": 1180,
"total_loss": 0.0038138232659576943,
"value_loss": 0.004192896059157647,
"value_loss_search": 0.006568620411331949,
"value_loss_thought": 0.02697454801736967
},
{
"epoch": 7.07,
"grad_norm": 0.751983057399965,
"learning_rate": 3.1644203093266257e-06,
"loss": 0.0311,
"sft_loss": 0.0022496749937999994,
"step": 1185,
"total_loss": 0.0026717950843249128,
"value_loss": 0.004221200832080285,
"value_loss_search": 0.008355579411721692,
"value_loss_thought": 0.025414026997623296
},
{
"epoch": 7.1,
"grad_norm": 0.48042996735947746,
"learning_rate": 3.147608550905415e-06,
"loss": 0.0303,
"sft_loss": 0.0035940095724072306,
"step": 1190,
"total_loss": 0.0038380636684223644,
"value_loss": 0.002440540775091904,
"value_loss_search": 0.008547667160826222,
"value_loss_thought": 0.01097665907091141
},
{
"epoch": 7.13,
"grad_norm": 0.38813805754778213,
"learning_rate": 3.1307653408839316e-06,
"loss": 0.0314,
"sft_loss": 0.0033699018502375113,
"step": 1195,
"total_loss": 0.003613495991919535,
"value_loss": 0.002435941319390622,
"value_loss_search": 0.009487671555439193,
"value_loss_thought": 0.009999858911305637
},
{
"epoch": 7.16,
"grad_norm": 0.500612598170884,
"learning_rate": 3.1138914972654423e-06,
"loss": 0.0328,
"sft_loss": 0.0036761065653990953,
"step": 1200,
"total_loss": 0.00389666182005044,
"value_loss": 0.0022055522495065816,
"value_loss_search": 0.010129135770739595,
"value_loss_thought": 0.0075152823273128885
},
{
"epoch": 7.19,
"grad_norm": 0.6490281681098725,
"learning_rate": 3.0969878395409536e-06,
"loss": 0.0253,
"sft_loss": 0.003135368030052632,
"step": 1205,
"total_loss": 0.0033622780613200122,
"value_loss": 0.002269100064461327,
"value_loss_search": 0.009437274075094138,
"value_loss_thought": 0.00871552659527879
},
{
"epoch": 7.22,
"grad_norm": 0.4935810834710449,
"learning_rate": 3.08005518864942e-06,
"loss": 0.0306,
"sft_loss": 0.0035176657198462634,
"step": 1210,
"total_loss": 0.0037368611569259967,
"value_loss": 0.0021919542997125064,
"value_loss_search": 0.008411810171332945,
"value_loss_thought": 0.009123824179141593
},
{
"epoch": 7.25,
"grad_norm": 0.5321452984531243,
"learning_rate": 3.06309436693787e-06,
"loss": 0.0298,
"sft_loss": 0.0036181122821290048,
"step": 1215,
"total_loss": 0.003920620708220213,
"value_loss": 0.0030250843786689074,
"value_loss_search": 0.012618535867022728,
"value_loss_thought": 0.011582138900485007
},
{
"epoch": 7.28,
"grad_norm": 0.5041384981332757,
"learning_rate": 3.0461061981214685e-06,
"loss": 0.029,
"sft_loss": 0.0037700470944400876,
"step": 1220,
"total_loss": 0.003952616032614742,
"value_loss": 0.001825689259169394,
"value_loss_search": 0.007131004981476963,
"value_loss_thought": 0.0074745089516000006
},
{
"epoch": 7.31,
"grad_norm": 0.825130289977903,
"learning_rate": 3.029091507243514e-06,
"loss": 0.0337,
"sft_loss": 0.00417679272359237,
"step": 1225,
"total_loss": 0.004465274805897934,
"value_loss": 0.0028848204653513674,
"value_loss_search": 0.012175960628178472,
"value_loss_thought": 0.010902603188787907
},
{
"epoch": 7.34,
"grad_norm": 0.48612582380031194,
"learning_rate": 3.0120511206353692e-06,
"loss": 0.0306,
"sft_loss": 0.0037190442701103164,
"step": 1230,
"total_loss": 0.003983076896520288,
"value_loss": 0.0026403263425891057,
"value_loss_search": 0.009736456504344914,
"value_loss_thought": 0.011386154282945427
},
{
"epoch": 7.37,
"grad_norm": 0.49557711764797474,
"learning_rate": 2.9949858658763297e-06,
"loss": 0.0293,
"sft_loss": 0.0033108420844655483,
"step": 1235,
"total_loss": 0.003569280348868631,
"value_loss": 0.002584382791928874,
"value_loss_search": 0.009431568763068299,
"value_loss_thought": 0.011243493664233028
},
{
"epoch": 7.4,
"grad_norm": 0.5415430915788131,
"learning_rate": 2.9778965717534314e-06,
"loss": 0.0323,
"sft_loss": 0.0030432559084147214,
"step": 1240,
"total_loss": 0.003313265562081824,
"value_loss": 0.002700096501041571,
"value_loss_search": 0.006881270110670812,
"value_loss_thought": 0.014719502057914724
},
{
"epoch": 7.43,
"grad_norm": 0.708236640225555,
"learning_rate": 2.9607840682211987e-06,
"loss": 0.0315,
"sft_loss": 0.003214700281387195,
"step": 1245,
"total_loss": 0.003709355751084331,
"value_loss": 0.004946554816625337,
"value_loss_search": 0.006554803127994546,
"value_loss_thought": 0.03301763468744454
},
{
"epoch": 7.46,
"grad_norm": 0.5437059054594167,
"learning_rate": 2.9436491863613404e-06,
"loss": 0.0316,
"sft_loss": 0.0036181325966026636,
"step": 1250,
"total_loss": 0.003912387714774468,
"value_loss": 0.0029425512826605884,
"value_loss_search": 0.01317626194404511,
"value_loss_thought": 0.01036414824043277
},
{
"epoch": 7.49,
"grad_norm": 0.6432414654067561,
"learning_rate": 2.9264927583423847e-06,
"loss": 0.0306,
"sft_loss": 0.003098224982386455,
"step": 1255,
"total_loss": 0.003732235044594745,
"value_loss": 0.006340100448505836,
"value_loss_search": 0.010029617098223299,
"value_loss_thought": 0.040691186868843945
},
{
"epoch": 7.52,
"grad_norm": 0.7784154218893079,
"learning_rate": 2.9093156173792675e-06,
"loss": 0.0329,
"sft_loss": 0.003491103381384164,
"step": 1260,
"total_loss": 0.003718133881397989,
"value_loss": 0.002270305072852352,
"value_loss_search": 0.010139237881389818,
"value_loss_thought": 0.008023202697256693
},
{
"epoch": 7.55,
"grad_norm": 0.5051189878209226,
"learning_rate": 2.8921185976928613e-06,
"loss": 0.0299,
"sft_loss": 0.0037886684003751725,
"step": 1265,
"total_loss": 0.00400653738573169,
"value_loss": 0.0021786899614198775,
"value_loss_search": 0.00914538588888263,
"value_loss_thought": 0.008284133870392906
},
{
"epoch": 7.58,
"grad_norm": 0.6290655324172468,
"learning_rate": 2.8749025344694653e-06,
"loss": 0.0336,
"sft_loss": 0.004462181986309588,
"step": 1270,
"total_loss": 0.004616037011123808,
"value_loss": 0.00153855009958761,
"value_loss_search": 0.005969772761523018,
"value_loss_thought": 0.006338628097728361
},
{
"epoch": 7.61,
"grad_norm": 0.5333504288117544,
"learning_rate": 2.857668263820244e-06,
"loss": 0.0303,
"sft_loss": 0.003401619120268151,
"step": 1275,
"total_loss": 0.003593827542800909,
"value_loss": 0.0019220843075743233,
"value_loss_search": 0.008753876088519519,
"value_loss_thought": 0.006622798505304672
},
{
"epoch": 7.64,
"grad_norm": 0.4126781957882197,
"learning_rate": 2.840416622740617e-06,
"loss": 0.0295,
"sft_loss": 0.004203358304221183,
"step": 1280,
"total_loss": 0.0044454284535731855,
"value_loss": 0.0024207017429930034,
"value_loss_search": 0.00895141239620898,
"value_loss_thought": 0.010414201496178065
},
{
"epoch": 7.67,
"grad_norm": 0.636868430333384,
"learning_rate": 2.823148449069613e-06,
"loss": 0.0317,
"sft_loss": 0.0037902468640822915,
"step": 1285,
"total_loss": 0.004080002412759143,
"value_loss": 0.002897555428683063,
"value_loss_search": 0.010850950920882951,
"value_loss_thought": 0.012329492640174067
},
{
"epoch": 7.7,
"grad_norm": 0.49524669578109337,
"learning_rate": 2.8058645814491784e-06,
"loss": 0.0312,
"sft_loss": 0.004466524376766756,
"step": 1290,
"total_loss": 0.00469799009814551,
"value_loss": 0.002314657226224881,
"value_loss_search": 0.006950648547240234,
"value_loss_thought": 0.011566609143937968
},
{
"epoch": 7.73,
"grad_norm": 0.4873869632029965,
"learning_rate": 2.7885658592834488e-06,
"loss": 0.032,
"sft_loss": 0.004330064181704074,
"step": 1295,
"total_loss": 0.0045417543143230436,
"value_loss": 0.0021169017202510077,
"value_loss_search": 0.00826698833628825,
"value_loss_thought": 0.00866822535913343
},
{
"epoch": 7.76,
"grad_norm": 0.5038272789213597,
"learning_rate": 2.771253122697981e-06,
"loss": 0.0331,
"sft_loss": 0.0041820299404207615,
"step": 1300,
"total_loss": 0.004449167268626297,
"value_loss": 0.002671373126996457,
"value_loss_search": 0.008917606517729836,
"value_loss_thought": 0.012453378505870204
},
{
"epoch": 7.79,
"grad_norm": 0.5716128545581782,
"learning_rate": 2.7539272124989545e-06,
"loss": 0.0327,
"sft_loss": 0.0034668007108848544,
"step": 1305,
"total_loss": 0.0037846647005551405,
"value_loss": 0.0031786399593784153,
"value_loss_search": 0.012202327424117244,
"value_loss_thought": 0.013226792128807573
},
{
"epoch": 7.82,
"grad_norm": 0.7312620161870373,
"learning_rate": 2.736588970132333e-06,
"loss": 0.032,
"sft_loss": 0.0036935678450390696,
"step": 1310,
"total_loss": 0.003966284790448071,
"value_loss": 0.0027271693567854525,
"value_loss_search": 0.011858981241562105,
"value_loss_thought": 0.00995837363161627
},
{
"epoch": 7.85,
"grad_norm": 0.48258120287180617,
"learning_rate": 2.7192392376430014e-06,
"loss": 0.0313,
"sft_loss": 0.003437778353691101,
"step": 1315,
"total_loss": 0.0036498856757901875,
"value_loss": 0.0021210731328892506,
"value_loss_search": 0.008885550579176994,
"value_loss_thought": 0.008083034464357297
},
{
"epoch": 7.88,
"grad_norm": 0.6553771008550281,
"learning_rate": 2.701878857633874e-06,
"loss": 0.0328,
"sft_loss": 0.002638998458860442,
"step": 1320,
"total_loss": 0.002854476947356943,
"value_loss": 0.0021547851526747762,
"value_loss_search": 0.006954838147055398,
"value_loss_thought": 0.010283443070898101
},
{
"epoch": 7.91,
"grad_norm": 0.5831125961756038,
"learning_rate": 2.684508673224967e-06,
"loss": 0.0348,
"sft_loss": 0.004423308192053809,
"step": 1325,
"total_loss": 0.004638882860058402,
"value_loss": 0.002155746738399955,
"value_loss_search": 0.009081033444783771,
"value_loss_thought": 0.008164940534516062
},
{
"epoch": 7.94,
"grad_norm": 0.6554790409450288,
"learning_rate": 2.6671295280124567e-06,
"loss": 0.0322,
"sft_loss": 0.003197679913137108,
"step": 1330,
"total_loss": 0.0033951037816166265,
"value_loss": 0.001974238623790825,
"value_loss_search": 0.007716037955879074,
"value_loss_thought": 0.008077871069599497
},
{
"epoch": 7.97,
"grad_norm": 0.6464974512156216,
"learning_rate": 2.649742266027705e-06,
"loss": 0.0309,
"sft_loss": 0.0025466441409662368,
"step": 1335,
"total_loss": 0.002726721732233273,
"value_loss": 0.001800775762740159,
"value_loss_search": 0.006972815421841005,
"value_loss_thought": 0.007433390758114911
},
{
"epoch": 8.0,
"grad_norm": 0.6803735749099431,
"learning_rate": 2.632347731696274e-06,
"loss": 0.033,
"sft_loss": 0.003279316209955141,
"step": 1340,
"total_loss": 0.0035762524097776804,
"value_loss": 0.002969362228782302,
"value_loss_search": 0.008045078085967817,
"value_loss_thought": 0.0157098194164746
},
{
"epoch": 8.03,
"grad_norm": 0.39085071585625647,
"learning_rate": 2.6149467697969118e-06,
"loss": 0.0225,
"sft_loss": 0.002843447361374274,
"step": 1345,
"total_loss": 0.002981356151798309,
"value_loss": 0.001379087858413186,
"value_loss_search": 0.004844959436161389,
"value_loss_thought": 0.006187743457485339
},
{
"epoch": 8.06,
"grad_norm": 0.48063989373141985,
"learning_rate": 2.597540225420525e-06,
"loss": 0.0226,
"sft_loss": 0.002574404375627637,
"step": 1350,
"total_loss": 0.0028025899320653024,
"value_loss": 0.0022818553584329493,
"value_loss_search": 0.005695947931963019,
"value_loss_thought": 0.012558894986273116
},
{
"epoch": 8.09,
"grad_norm": 0.4206131958945122,
"learning_rate": 2.580128943929139e-06,
"loss": 0.0212,
"sft_loss": 0.002781625863281079,
"step": 1355,
"total_loss": 0.002963343672460894,
"value_loss": 0.0018171783105231042,
"value_loss_search": 0.008560673631276928,
"value_loss_thought": 0.005976753031427506
},
{
"epoch": 8.12,
"grad_norm": 0.3653474778372023,
"learning_rate": 2.5627137709148386e-06,
"loss": 0.0225,
"sft_loss": 0.0017891598748974503,
"step": 1360,
"total_loss": 0.001957193014266068,
"value_loss": 0.0016803315540641962,
"value_loss_search": 0.006167404261202591,
"value_loss_thought": 0.007275248032328818
},
{
"epoch": 8.15,
"grad_norm": 0.43951845600689593,
"learning_rate": 2.5452955521587064e-06,
"loss": 0.0225,
"sft_loss": 0.0016710011375835165,
"step": 1365,
"total_loss": 0.0018185687295726894,
"value_loss": 0.0014756758409930626,
"value_loss_search": 0.006119606431695956,
"value_loss_thought": 0.005685800284209108
},
{
"epoch": 8.18,
"grad_norm": 0.45317276348714697,
"learning_rate": 2.5278751335897423e-06,
"loss": 0.0238,
"sft_loss": 0.0029196401010267437,
"step": 1370,
"total_loss": 0.0030648296158460654,
"value_loss": 0.0014518950408728415,
"value_loss_search": 0.0060932497550993505,
"value_loss_thought": 0.005521910469178692
},
{
"epoch": 8.21,
"grad_norm": 0.3857977796232688,
"learning_rate": 2.5104533612437816e-06,
"loss": 0.0203,
"sft_loss": 0.002657680620905012,
"step": 1375,
"total_loss": 0.0027829522318313592,
"value_loss": 0.00125271600504675,
"value_loss_search": 0.004111475246963892,
"value_loss_thought": 0.005910252820331152
},
{
"epoch": 8.24,
"grad_norm": 0.43983223503478724,
"learning_rate": 2.493031081222406e-06,
"loss": 0.0214,
"sft_loss": 0.002025950566167012,
"step": 1380,
"total_loss": 0.0021369275005781673,
"value_loss": 0.0011097693309352508,
"value_loss_search": 0.003321273262153568,
"value_loss_thought": 0.005556881445545514
},
{
"epoch": 8.27,
"grad_norm": 0.3626207865464384,
"learning_rate": 2.475609139651855e-06,
"loss": 0.0208,
"sft_loss": 0.002262994254124351,
"step": 1385,
"total_loss": 0.0023927846590680703,
"value_loss": 0.0012979039850506524,
"value_loss_search": 0.005410684119317466,
"value_loss_thought": 0.004972547819033934
},
{
"epoch": 8.3,
"grad_norm": 0.5204196278274228,
"learning_rate": 2.4581883826419294e-06,
"loss": 0.0238,
"sft_loss": 0.0023898789659142494,
"step": 1390,
"total_loss": 0.0025361195974028306,
"value_loss": 0.0014624062704569952,
"value_loss_search": 0.0058827654516449,
"value_loss_thought": 0.005816484717388448
},
{
"epoch": 8.33,
"grad_norm": 0.45142759026857865,
"learning_rate": 2.4407696562449006e-06,
"loss": 0.0209,
"sft_loss": 0.0020229626476066186,
"step": 1395,
"total_loss": 0.0021993215879206216,
"value_loss": 0.0017635896525462157,
"value_loss_search": 0.006506465665435712,
"value_loss_thought": 0.007602251682465066
},
{
"epoch": 8.36,
"grad_norm": 0.6201398131026475,
"learning_rate": 2.4233538064144226e-06,
"loss": 0.0214,
"sft_loss": 0.002320754388347268,
"step": 1400,
"total_loss": 0.0024575029708927333,
"value_loss": 0.0013674858636591124,
"value_loss_search": 0.004308351113706976,
"value_loss_thought": 0.006631535803444421
},
{
"epoch": 8.38,
"grad_norm": 0.45220431536451633,
"learning_rate": 2.4059416789644473e-06,
"loss": 0.0224,
"sft_loss": 0.0025798780581681056,
"step": 1405,
"total_loss": 0.00268079744263332,
"value_loss": 0.0010091937743709422,
"value_loss_search": 0.004291023600296739,
"value_loss_thought": 0.0037825266010713676
},
{
"epoch": 8.41,
"grad_norm": 0.45477594831619567,
"learning_rate": 2.388534119528145e-06,
"loss": 0.0195,
"sft_loss": 0.001965572632616386,
"step": 1410,
"total_loss": 0.0021669458707378906,
"value_loss": 0.002013732181683281,
"value_loss_search": 0.006261610782757998,
"value_loss_thought": 0.009848246640262914
},
{
"epoch": 8.44,
"grad_norm": 0.4218999322836366,
"learning_rate": 2.3711319735168378e-06,
"loss": 0.0219,
"sft_loss": 0.002767064847284928,
"step": 1415,
"total_loss": 0.0029677543869524926,
"value_loss": 0.0020068954641317303,
"value_loss_search": 0.006902593411530234,
"value_loss_thought": 0.00915257024798848
},
{
"epoch": 8.47,
"grad_norm": 0.39469190101455454,
"learning_rate": 2.353736086078941e-06,
"loss": 0.0219,
"sft_loss": 0.00205565721844323,
"step": 1420,
"total_loss": 0.0023155218750730453,
"value_loss": 0.0025986466305312206,
"value_loss_search": 0.009201484014715789,
"value_loss_thought": 0.011587689043972204
},
{
"epoch": 8.5,
"grad_norm": 0.3222692938718468,
"learning_rate": 2.336347302058916e-06,
"loss": 0.0231,
"sft_loss": 0.0034793566446751356,
"step": 1425,
"total_loss": 0.00360437715615376,
"value_loss": 0.0012502055355525954,
"value_loss_search": 0.004974942305875629,
"value_loss_thought": 0.005026701947775792
},
{
"epoch": 8.53,
"grad_norm": 0.4599366779305295,
"learning_rate": 2.3189664659562442e-06,
"loss": 0.024,
"sft_loss": 0.002877801636350341,
"step": 1430,
"total_loss": 0.003031013450221565,
"value_loss": 0.0015321182537206823,
"value_loss_search": 0.00576030847768152,
"value_loss_thought": 0.006496637430245755
},
{
"epoch": 8.56,
"grad_norm": 0.4315667432642988,
"learning_rate": 2.3015944218844063e-06,
"loss": 0.022,
"sft_loss": 0.002819139277562499,
"step": 1435,
"total_loss": 0.002962993244301515,
"value_loss": 0.0014385397402392642,
"value_loss_search": 0.005644433948407368,
"value_loss_thought": 0.005863883913934842
},
{
"epoch": 8.59,
"grad_norm": 0.4745674858393937,
"learning_rate": 2.2842320135298946e-06,
"loss": 0.0229,
"sft_loss": 0.002344584878301248,
"step": 1440,
"total_loss": 0.0024817303616316622,
"value_loss": 0.0013714549205573689,
"value_loss_search": 0.006260404985391687,
"value_loss_thought": 0.004711234440060252
},
{
"epoch": 8.62,
"grad_norm": 0.40694918242518996,
"learning_rate": 2.2668800841112345e-06,
"loss": 0.0229,
"sft_loss": 0.00296173918468412,
"step": 1445,
"total_loss": 0.0030806214503627414,
"value_loss": 0.0011888226746123109,
"value_loss_search": 0.004837690460476551,
"value_loss_thought": 0.004672890894835291
},
{
"epoch": 8.65,
"grad_norm": 0.4107733649493513,
"learning_rate": 2.2495394763380338e-06,
"loss": 0.0225,
"sft_loss": 0.003584610787220299,
"step": 1450,
"total_loss": 0.0038719090720064743,
"value_loss": 0.0028729828365271714,
"value_loss_search": 0.005230140845671372,
"value_loss_thought": 0.017753721810265688
},
{
"epoch": 8.68,
"grad_norm": 0.3728305802169862,
"learning_rate": 2.232211032370057e-06,
"loss": 0.0227,
"sft_loss": 0.0025901119457557797,
"step": 1455,
"total_loss": 0.0027724159214699284,
"value_loss": 0.0018230398026389595,
"value_loss_search": 0.006323750824310537,
"value_loss_thought": 0.008260567580418866
},
{
"epoch": 8.71,
"grad_norm": 0.43244392951704247,
"learning_rate": 2.2148955937763215e-06,
"loss": 0.0202,
"sft_loss": 0.0023802727228030562,
"step": 1460,
"total_loss": 0.0025262993830239113,
"value_loss": 0.0014602663856294385,
"value_loss_search": 0.004974742010833211,
"value_loss_thought": 0.006707389143184628
},
{
"epoch": 8.74,
"grad_norm": 0.446983159592719,
"learning_rate": 2.197594001494232e-06,
"loss": 0.0231,
"sft_loss": 0.00247747907997109,
"step": 1465,
"total_loss": 0.0028393455249869247,
"value_loss": 0.0036186647702152186,
"value_loss_search": 0.005167914255184769,
"value_loss_thought": 0.02378140405708109
},
{
"epoch": 8.77,
"grad_norm": 0.582681357981295,
"learning_rate": 2.1803070957887348e-06,
"loss": 0.0232,
"sft_loss": 0.0029129542876034976,
"step": 1470,
"total_loss": 0.0030655652646260022,
"value_loss": 0.0015261098120788574,
"value_loss_search": 0.006358572702845322,
"value_loss_thought": 0.005850305858075444
},
{
"epoch": 8.8,
"grad_norm": 0.4700551072376061,
"learning_rate": 2.1630357162115133e-06,
"loss": 0.0219,
"sft_loss": 0.002174633409595117,
"step": 1475,
"total_loss": 0.002347504053091143,
"value_loss": 0.0017287064572656164,
"value_loss_search": 0.008121828264177112,
"value_loss_thought": 0.005707823473858298
},
{
"epoch": 8.83,
"grad_norm": 0.7663496628024437,
"learning_rate": 2.1457807015602086e-06,
"loss": 0.0234,
"sft_loss": 0.0025546713673975318,
"step": 1480,
"total_loss": 0.00283914315147058,
"value_loss": 0.002844717770221905,
"value_loss_search": 0.011075520714871345,
"value_loss_thought": 0.011682221261048653
},
{
"epoch": 8.86,
"grad_norm": 0.3411989638023231,
"learning_rate": 2.1285428898376907e-06,
"loss": 0.0218,
"sft_loss": 0.0021655169257428497,
"step": 1485,
"total_loss": 0.0023130710998373162,
"value_loss": 0.0014755416389562015,
"value_loss_search": 0.005575540512140265,
"value_loss_thought": 0.006228792705860542
},
{
"epoch": 8.89,
"grad_norm": 0.4283075171896123,
"learning_rate": 2.1113231182113557e-06,
"loss": 0.0226,
"sft_loss": 0.002557673762203194,
"step": 1490,
"total_loss": 0.002811015537105277,
"value_loss": 0.002533417688255213,
"value_loss_search": 0.006894818281846682,
"value_loss_thought": 0.013372523540647307
},
{
"epoch": 8.92,
"grad_norm": 0.38025999762981544,
"learning_rate": 2.0941222229724683e-06,
"loss": 0.0195,
"sft_loss": 0.002423516203998588,
"step": 1495,
"total_loss": 0.0026445118043511686,
"value_loss": 0.002209956094793597,
"value_loss_search": 0.007773246503208498,
"value_loss_thought": 0.00990640217037111
},
{
"epoch": 8.95,
"grad_norm": 0.4107993684483595,
"learning_rate": 2.076941039495545e-06,
"loss": 0.023,
"sft_loss": 0.002852113952394575,
"step": 1500,
"total_loss": 0.003029771816866855,
"value_loss": 0.0017765785493793374,
"value_loss_search": 0.006161500808036635,
"value_loss_thought": 0.008051127563931004
},
{
"epoch": 8.98,
"grad_norm": 0.43206266155631295,
"learning_rate": 2.05978040219779e-06,
"loss": 0.0219,
"sft_loss": 0.0023770652449456977,
"step": 1505,
"total_loss": 0.002536764156579352,
"value_loss": 0.0015969893091551056,
"value_loss_search": 0.0054944734949231135,
"value_loss_thought": 0.007281441086252016
},
{
"epoch": 9.01,
"grad_norm": 0.374824172192575,
"learning_rate": 2.0426411444985622e-06,
"loss": 0.0212,
"sft_loss": 0.0025530525454087183,
"step": 1510,
"total_loss": 0.0026753786481890527,
"value_loss": 0.0012232610420596756,
"value_loss_search": 0.004044807156776642,
"value_loss_thought": 0.005741281189693836
},
{
"epoch": 9.04,
"grad_norm": 0.5103310551708304,
"learning_rate": 2.0255240987789077e-06,
"loss": 0.017,
"sft_loss": 0.002143319571041502,
"step": 1515,
"total_loss": 0.0022470162215711296,
"value_loss": 0.001036966439151854,
"value_loss_search": 0.004559061944007681,
"value_loss_thought": 0.0037366695483342484
},
{
"epoch": 9.07,
"grad_norm": 0.41407771047339026,
"learning_rate": 2.008430096341129e-06,
"loss": 0.0165,
"sft_loss": 0.00211839419498574,
"step": 1520,
"total_loss": 0.002260489938532828,
"value_loss": 0.0014209576240091337,
"value_loss_search": 0.006807764833172314,
"value_loss_thought": 0.004559896182763623
},
{
"epoch": 9.1,
"grad_norm": 0.3998377123999996,
"learning_rate": 1.991359967368416e-06,
"loss": 0.0167,
"sft_loss": 0.0017885153938550502,
"step": 1525,
"total_loss": 0.0019043610838139103,
"value_loss": 0.001158456964958532,
"value_loss_search": 0.004520953930921223,
"value_loss_thought": 0.004746701816475252
},
{
"epoch": 9.13,
"grad_norm": 0.25993528776663594,
"learning_rate": 1.974314540884522e-06,
"loss": 0.0168,
"sft_loss": 0.0022823128500021996,
"step": 1530,
"total_loss": 0.0023883844661270357,
"value_loss": 0.0010607163117128948,
"value_loss_search": 0.003205415800539413,
"value_loss_thought": 0.0052803147056920356
},
{
"epoch": 9.16,
"grad_norm": 0.28607830106364285,
"learning_rate": 1.9572946447135087e-06,
"loss": 0.017,
"sft_loss": 0.0020253795781172814,
"step": 1535,
"total_loss": 0.002180098254166296,
"value_loss": 0.0015471866376628896,
"value_loss_search": 0.005289423008980521,
"value_loss_thought": 0.007088070128247637
},
{
"epoch": 9.19,
"grad_norm": 0.36665251321589426,
"learning_rate": 1.9403011054395372e-06,
"loss": 0.0176,
"sft_loss": 0.0020406075345817953,
"step": 1540,
"total_loss": 0.00214596866593979,
"value_loss": 0.0010536111770306888,
"value_loss_search": 0.003708674301128667,
"value_loss_thought": 0.004720214986241445
},
{
"epoch": 9.22,
"grad_norm": 0.33701179906071765,
"learning_rate": 1.923334748366727e-06,
"loss": 0.0164,
"sft_loss": 0.0018277755152666941,
"step": 1545,
"total_loss": 0.001965286196420379,
"value_loss": 0.0013751067914199665,
"value_loss_search": 0.0051922910111670715,
"value_loss_thought": 0.005808563233449604
},
{
"epoch": 9.25,
"grad_norm": 0.30304308100827854,
"learning_rate": 1.9063963974790715e-06,
"loss": 0.0171,
"sft_loss": 0.0021657033037627118,
"step": 1550,
"total_loss": 0.0022887821434451894,
"value_loss": 0.0012307884190931873,
"value_loss_search": 0.004165462106902851,
"value_loss_thought": 0.005680845198685347
},
{
"epoch": 9.28,
"grad_norm": 0.348110330157348,
"learning_rate": 1.8894868754004247e-06,
"loss": 0.0168,
"sft_loss": 0.002240948341204785,
"step": 1555,
"total_loss": 0.002330947991924148,
"value_loss": 0.000899996584598739,
"value_loss_search": 0.0027729876618877826,
"value_loss_thought": 0.00442698502301937
},
{
"epoch": 9.31,
"grad_norm": 0.3510042973090616,
"learning_rate": 1.8726070033545468e-06,
"loss": 0.0176,
"sft_loss": 0.0018620806687977165,
"step": 1560,
"total_loss": 0.0019822285716736944,
"value_loss": 0.0012014789214504162,
"value_loss_search": 0.0045417374156954795,
"value_loss_thought": 0.005070093995891511
},
{
"epoch": 9.34,
"grad_norm": 0.2988369790384324,
"learning_rate": 1.855757601125221e-06,
"loss": 0.0168,
"sft_loss": 0.0019012396631296724,
"step": 1565,
"total_loss": 0.0020716833577807845,
"value_loss": 0.001704436970885581,
"value_loss_search": 0.006682152269536346,
"value_loss_thought": 0.006953343548593694
},
{
"epoch": 9.37,
"grad_norm": 0.4000228004615745,
"learning_rate": 1.8389394870164418e-06,
"loss": 0.0174,
"sft_loss": 0.0017132473614765332,
"step": 1570,
"total_loss": 0.0018083037684050395,
"value_loss": 0.0009505639430244627,
"value_loss_search": 0.004880918659534927,
"value_loss_thought": 0.0027235929035441587
},
{
"epoch": 9.4,
"grad_norm": 0.3335584079653667,
"learning_rate": 1.8221534778126712e-06,
"loss": 0.016,
"sft_loss": 0.0019671204237965865,
"step": 1575,
"total_loss": 0.0020535163486698595,
"value_loss": 0.0008639591912242394,
"value_loss_search": 0.0034627173671083256,
"value_loss_thought": 0.003448956180272944
},
{
"epoch": 9.43,
"grad_norm": 0.3026400110385097,
"learning_rate": 1.8054003887391727e-06,
"loss": 0.0166,
"sft_loss": 0.002014622194110416,
"step": 1580,
"total_loss": 0.0021511696702731344,
"value_loss": 0.001365474661497501,
"value_loss_search": 0.004294166664863042,
"value_loss_thought": 0.006629630598598624
},
{
"epoch": 9.46,
"grad_norm": 0.43139165457443246,
"learning_rate": 1.7886810334224192e-06,
"loss": 0.0163,
"sft_loss": 0.0021359879698138683,
"step": 1585,
"total_loss": 0.0022444051660613696,
"value_loss": 0.0010841718215033325,
"value_loss_search": 0.004590211787228782,
"value_loss_thought": 0.004083162726351475
},
{
"epoch": 9.49,
"grad_norm": 0.34009988215077,
"learning_rate": 1.7719962238505779e-06,
"loss": 0.0166,
"sft_loss": 0.002009772404562682,
"step": 1590,
"total_loss": 0.0022419645182367278,
"value_loss": 0.0023219212426283777,
"value_loss_search": 0.0064497248539396425,
"value_loss_thought": 0.01212564545467103
},
{
"epoch": 9.52,
"grad_norm": 0.29633516039709396,
"learning_rate": 1.7553467703340755e-06,
"loss": 0.017,
"sft_loss": 0.0015611476090271025,
"step": 1595,
"total_loss": 0.0016733453244299312,
"value_loss": 0.0011219771564128678,
"value_loss_search": 0.0053791521318601095,
"value_loss_thought": 0.0035966651016906327
},
{
"epoch": 9.55,
"grad_norm": 0.38962016205008826,
"learning_rate": 1.7387334814662452e-06,
"loss": 0.0168,
"sft_loss": 0.002254475053632632,
"step": 1600,
"total_loss": 0.0023654911761866516,
"value_loss": 0.0011101611622962083,
"value_loss_search": 0.004612163749004594,
"value_loss_thought": 0.004269125514247208
},
{
"epoch": 9.58,
"grad_norm": 0.38388365774795874,
"learning_rate": 1.7221571640840562e-06,
"loss": 0.0176,
"sft_loss": 0.0018735320656560362,
"step": 1605,
"total_loss": 0.0019748406046375066,
"value_loss": 0.0010130854097496922,
"value_loss_search": 0.003405629392977971,
"value_loss_thought": 0.004699053899685168
},
{
"epoch": 9.61,
"grad_norm": 0.4095939751375467,
"learning_rate": 1.7056186232289298e-06,
"loss": 0.0166,
"sft_loss": 0.0022918267059139907,
"step": 1610,
"total_loss": 0.0024106179324689947,
"value_loss": 0.001187912198918184,
"value_loss_search": 0.004366835134328539,
"value_loss_thought": 0.005136462485006632
},
{
"epoch": 9.64,
"grad_norm": 0.3302901304739707,
"learning_rate": 1.6891186621076433e-06,
"loss": 0.0186,
"sft_loss": 0.002024157461710274,
"step": 1615,
"total_loss": 0.0021383855524845785,
"value_loss": 0.001142280875455981,
"value_loss_search": 0.004602078883806371,
"value_loss_thought": 0.004536168186041323
},
{
"epoch": 9.67,
"grad_norm": 0.3679502879592282,
"learning_rate": 1.6726580820533155e-06,
"loss": 0.0159,
"sft_loss": 0.0017695592978270724,
"step": 1620,
"total_loss": 0.0018712303529298425,
"value_loss": 0.0010167105092577344,
"value_loss_search": 0.003538572788136207,
"value_loss_thought": 0.004595111271009955
},
{
"epoch": 9.7,
"grad_norm": 0.30533989281219753,
"learning_rate": 1.6562376824864985e-06,
"loss": 0.0166,
"sft_loss": 0.0025343591201817616,
"step": 1625,
"total_loss": 0.002648697585539139,
"value_loss": 0.0011433845557576206,
"value_loss_search": 0.004449716299927786,
"value_loss_thought": 0.004697360047975963
},
{
"epoch": 9.73,
"grad_norm": 0.2877518184615656,
"learning_rate": 1.6398582608763457e-06,
"loss": 0.0179,
"sft_loss": 0.0024212473537772892,
"step": 1630,
"total_loss": 0.0025349426502771165,
"value_loss": 0.0011369530704428144,
"value_loss_search": 0.0051415300209441735,
"value_loss_thought": 0.003954094471146164
},
{
"epoch": 9.76,
"grad_norm": 0.36027684134993015,
"learning_rate": 1.6235206127018865e-06,
"loss": 0.016,
"sft_loss": 0.002206899574957788,
"step": 1635,
"total_loss": 0.0022886144271467403,
"value_loss": 0.000817148587839256,
"value_loss_search": 0.003036996565958816,
"value_loss_thought": 0.003500192180490558
},
{
"epoch": 9.79,
"grad_norm": 0.35076644651015965,
"learning_rate": 1.6072255314133921e-06,
"loss": 0.0173,
"sft_loss": 0.0019144602090818807,
"step": 1640,
"total_loss": 0.002069194418021425,
"value_loss": 0.0015473420381795222,
"value_loss_search": 0.005170887146491054,
"value_loss_thought": 0.007207849150842094
},
{
"epoch": 9.82,
"grad_norm": 0.30979208746416814,
"learning_rate": 1.5909738083938387e-06,
"loss": 0.0181,
"sft_loss": 0.0023195294284960254,
"step": 1645,
"total_loss": 0.002506226720114313,
"value_loss": 0.0018669727418881622,
"value_loss_search": 0.004914225066238486,
"value_loss_thought": 0.010021556961669375
},
{
"epoch": 9.85,
"grad_norm": 0.3414912343606952,
"learning_rate": 1.5747662329204758e-06,
"loss": 0.0164,
"sft_loss": 0.0017500042042229325,
"step": 1650,
"total_loss": 0.0018422375416491832,
"value_loss": 0.0009223333461136462,
"value_loss_search": 0.003468296695177742,
"value_loss_thought": 0.003910370041808164
},
{
"epoch": 9.88,
"grad_norm": 0.34143690574927327,
"learning_rate": 1.5586035921264952e-06,
"loss": 0.0167,
"sft_loss": 0.001885048404801637,
"step": 1655,
"total_loss": 0.0019954120705705236,
"value_loss": 0.001103636745813219,
"value_loss_search": 0.004034641608018319,
"value_loss_thought": 0.004794452414535045
},
{
"epoch": 9.91,
"grad_norm": 0.3940780464860908,
"learning_rate": 1.5424866709628018e-06,
"loss": 0.0167,
"sft_loss": 0.0023443336365744473,
"step": 1660,
"total_loss": 0.0025089123803297753,
"value_loss": 0.001645787319603187,
"value_loss_search": 0.005139281411049979,
"value_loss_thought": 0.008027016961932532
},
{
"epoch": 9.94,
"grad_norm": 0.28513052875466016,
"learning_rate": 1.5264162521598893e-06,
"loss": 0.017,
"sft_loss": 0.002210353355621919,
"step": 1665,
"total_loss": 0.0023033933971760233,
"value_loss": 0.0009304003870511223,
"value_loss_search": 0.004236812041176563,
"value_loss_thought": 0.00320639110132106
},
{
"epoch": 9.97,
"grad_norm": 0.6554785763050915,
"learning_rate": 1.5103931161898321e-06,
"loss": 0.017,
"sft_loss": 0.001787349657388404,
"step": 1670,
"total_loss": 0.0019071295019472245,
"value_loss": 0.0011977983157066773,
"value_loss_search": 0.003837722380649211,
"value_loss_thought": 0.0057446641365231695
},
{
"epoch": 10.0,
"grad_norm": 0.47343410940009084,
"learning_rate": 1.4944180412283765e-06,
"loss": 0.0173,
"sft_loss": 0.0018310324900085106,
"step": 1675,
"total_loss": 0.001928851098625728,
"value_loss": 0.0009781861556859895,
"value_loss_search": 0.00370205105889454,
"value_loss_thought": 0.004123438105875721
},
{
"epoch": 10.03,
"grad_norm": 0.3191231605103231,
"learning_rate": 1.4784918031171507e-06,
"loss": 0.0138,
"sft_loss": 0.0015405306039610878,
"step": 1680,
"total_loss": 0.0016421698385045147,
"value_loss": 0.0010163923268578401,
"value_loss_search": 0.0045566821872171205,
"value_loss_thought": 0.003574456366766299
},
{
"epoch": 10.06,
"grad_norm": 0.19891894504058574,
"learning_rate": 1.4626151753259826e-06,
"loss": 0.0138,
"sft_loss": 0.001970075577264652,
"step": 1685,
"total_loss": 0.0020514145978665966,
"value_loss": 0.0008133902773010959,
"value_loss_search": 0.0033379198979332616,
"value_loss_thought": 0.003169202328041365
},
{
"epoch": 10.09,
"grad_norm": 0.2844490093173974,
"learning_rate": 1.4467889289153372e-06,
"loss": 0.0132,
"sft_loss": 0.0011790773802204057,
"step": 1690,
"total_loss": 0.0012578035701309887,
"value_loss": 0.0007872618921737739,
"value_loss_search": 0.0035334496231712365,
"value_loss_thought": 0.0027646455130707183
},
{
"epoch": 10.12,
"grad_norm": 0.22486418307955425,
"learning_rate": 1.4310138324988727e-06,
"loss": 0.0132,
"sft_loss": 0.0016656344232615083,
"step": 1695,
"total_loss": 0.0017502345744446757,
"value_loss": 0.0008460015417313116,
"value_loss_search": 0.0036587552590958694,
"value_loss_thought": 0.0031092570511646045
},
{
"epoch": 10.15,
"grad_norm": 0.2753979527731448,
"learning_rate": 1.415290652206105e-06,
"loss": 0.0143,
"sft_loss": 0.0017361613281536847,
"step": 1700,
"total_loss": 0.0018177182257247183,
"value_loss": 0.0008155690053840203,
"value_loss_search": 0.003209375508242829,
"value_loss_thought": 0.003315176499722838
},
{
"epoch": 10.18,
"grad_norm": 0.2572570727278164,
"learning_rate": 1.3996201516452062e-06,
"loss": 0.0137,
"sft_loss": 0.001388478121953085,
"step": 1705,
"total_loss": 0.0014834851837221663,
"value_loss": 0.0009500706352980615,
"value_loss_search": 0.0030097221551613983,
"value_loss_thought": 0.004590843019104796
},
{
"epoch": 10.21,
"grad_norm": 0.27025436045593615,
"learning_rate": 1.3840030918659174e-06,
"loss": 0.0147,
"sft_loss": 0.0013848344882717357,
"step": 1710,
"total_loss": 0.0014918723345459738,
"value_loss": 0.0010703784395445838,
"value_loss_search": 0.0053198799042775136,
"value_loss_thought": 0.0032431475258817956
},
{
"epoch": 10.23,
"grad_norm": 0.2813750257238534,
"learning_rate": 1.3684402313225858e-06,
"loss": 0.014,
"sft_loss": 0.0020835736999288202,
"step": 1715,
"total_loss": 0.002167087908361509,
"value_loss": 0.0008351421248391944,
"value_loss_search": 0.003797487074473338,
"value_loss_thought": 0.0028836499267299587
},
{
"epoch": 10.26,
"grad_norm": 0.23513053578061044,
"learning_rate": 1.3529323258373347e-06,
"loss": 0.0156,
"sft_loss": 0.001715753084863536,
"step": 1720,
"total_loss": 0.0017993840303944352,
"value_loss": 0.0008363092606941791,
"value_loss_search": 0.0030788765871307077,
"value_loss_thought": 0.003611597566623459
},
{
"epoch": 10.29,
"grad_norm": 0.23227494152180417,
"learning_rate": 1.3374801285633498e-06,
"loss": 0.014,
"sft_loss": 0.001370953200967051,
"step": 1725,
"total_loss": 0.0014732162911762713,
"value_loss": 0.0010226307487414487,
"value_loss_search": 0.0036572990339521993,
"value_loss_thought": 0.004523746974246024
},
{
"epoch": 10.32,
"grad_norm": 0.27072849780059804,
"learning_rate": 1.3220843899483093e-06,
"loss": 0.0132,
"sft_loss": 0.002192886942066252,
"step": 1730,
"total_loss": 0.0022738821006910827,
"value_loss": 0.0008099516685206254,
"value_loss_search": 0.0027381081928751884,
"value_loss_thought": 0.0037415051008338196
},
{
"epoch": 10.35,
"grad_norm": 0.23754108921707667,
"learning_rate": 1.3067458576979305e-06,
"loss": 0.0137,
"sft_loss": 0.0013820198219036683,
"step": 1735,
"total_loss": 0.0014648040350152768,
"value_loss": 0.0008278420881822513,
"value_loss_search": 0.002561309584873328,
"value_loss_thought": 0.004061427062742951
},
{
"epoch": 10.38,
"grad_norm": 0.36161029888838353,
"learning_rate": 1.2914652767396602e-06,
"loss": 0.0134,
"sft_loss": 0.0016668380645569413,
"step": 1740,
"total_loss": 0.0017722387631607718,
"value_loss": 0.0010540070087955654,
"value_loss_search": 0.003300642109496721,
"value_loss_thought": 0.005131414054389882
},
{
"epoch": 10.41,
"grad_norm": 0.2974708087969646,
"learning_rate": 1.2762433891865e-06,
"loss": 0.0143,
"sft_loss": 0.0014268789207562804,
"step": 1745,
"total_loss": 0.0015154178811258134,
"value_loss": 0.0008853895097672648,
"value_loss_search": 0.0027578251025261124,
"value_loss_thought": 0.004325291005079635
},
{
"epoch": 10.44,
"grad_norm": 0.3676755156415561,
"learning_rate": 1.2610809343009588e-06,
"loss": 0.0153,
"sft_loss": 0.0014077324420213699,
"step": 1750,
"total_loss": 0.0015097191839743118,
"value_loss": 0.001019867208327696,
"value_loss_search": 0.003453613588283133,
"value_loss_thought": 0.004705324086262408
},
{
"epoch": 10.47,
"grad_norm": 0.35317661644500653,
"learning_rate": 1.2459786484591535e-06,
"loss": 0.0138,
"sft_loss": 0.0014058848028071225,
"step": 1755,
"total_loss": 0.0014983110793011178,
"value_loss": 0.0009242627733442532,
"value_loss_search": 0.0036805602864205867,
"value_loss_thought": 0.0037135419420565086
},
{
"epoch": 10.5,
"grad_norm": 0.3090797575849808,
"learning_rate": 1.2309372651150456e-06,
"loss": 0.0143,
"sft_loss": 0.0013164847245207057,
"step": 1760,
"total_loss": 0.0014013042361533223,
"value_loss": 0.0008481952301963247,
"value_loss_search": 0.0033867947210950433,
"value_loss_thought": 0.0033987670644137326
},
{
"epoch": 10.53,
"grad_norm": 0.3792937092146306,
"learning_rate": 1.2159575147648226e-06,
"loss": 0.0138,
"sft_loss": 0.0010253429325530305,
"step": 1765,
"total_loss": 0.0011080630618408803,
"value_loss": 0.0008272012292422914,
"value_loss_search": 0.003076424640460118,
"value_loss_thought": 0.0035411851821436358
},
{
"epoch": 10.56,
"grad_norm": 0.23588251371295496,
"learning_rate": 1.2010401249114166e-06,
"loss": 0.0154,
"sft_loss": 0.002121089934371412,
"step": 1770,
"total_loss": 0.002200343585804987,
"value_loss": 0.0007925363796857709,
"value_loss_search": 0.0028656800206931623,
"value_loss_thought": 0.0034746110001947273
},
{
"epoch": 10.59,
"grad_norm": 0.18704537996101944,
"learning_rate": 1.1861858200291754e-06,
"loss": 0.015,
"sft_loss": 0.001497958108666353,
"step": 1775,
"total_loss": 0.0015780455702179453,
"value_loss": 0.0008008746483028518,
"value_loss_search": 0.003131278493640366,
"value_loss_thought": 0.0032757186703292972
},
{
"epoch": 10.62,
"grad_norm": 0.2611143768462882,
"learning_rate": 1.1713953215286786e-06,
"loss": 0.0146,
"sft_loss": 0.0016566726000746713,
"step": 1780,
"total_loss": 0.0017335941357572437,
"value_loss": 0.000769215394677758,
"value_loss_search": 0.0035173230212080854,
"value_loss_thought": 0.0026364001052343157
},
{
"epoch": 10.65,
"grad_norm": 0.22627379212344326,
"learning_rate": 1.156669347721698e-06,
"loss": 0.0139,
"sft_loss": 0.0013101978547638282,
"step": 1785,
"total_loss": 0.0014014345401903937,
"value_loss": 0.0009123669141331448,
"value_loss_search": 0.0029923125522913095,
"value_loss_thought": 0.004306622803687788
},
{
"epoch": 10.68,
"grad_norm": 0.2755772125737796,
"learning_rate": 1.1420086137863187e-06,
"loss": 0.0147,
"sft_loss": 0.0017199999798322096,
"step": 1790,
"total_loss": 0.0018377338240952667,
"value_loss": 0.0011773384410275866,
"value_loss_search": 0.004846148737328804,
"value_loss_thought": 0.004572558852294151
},
{
"epoch": 10.71,
"grad_norm": 0.32880460241811366,
"learning_rate": 1.127413831732198e-06,
"loss": 0.0139,
"sft_loss": 0.001667250582249835,
"step": 1795,
"total_loss": 0.0017713467202923993,
"value_loss": 0.0010409614306340132,
"value_loss_search": 0.0038816104845068367,
"value_loss_thought": 0.00444608086813787
},
{
"epoch": 10.74,
"grad_norm": 0.31078844068026645,
"learning_rate": 1.1128857103659924e-06,
"loss": 0.0148,
"sft_loss": 0.0020905163779389112,
"step": 1800,
"total_loss": 0.0021922153910395536,
"value_loss": 0.0010169901736389875,
"value_loss_search": 0.0040109903552888685,
"value_loss_thought": 0.004124930962098006
},
{
"epoch": 10.77,
"grad_norm": 0.25337131057461965,
"learning_rate": 1.098424955256929e-06,
"loss": 0.0146,
"sft_loss": 0.0014486652828054503,
"step": 1805,
"total_loss": 0.0015391261362040609,
"value_loss": 0.0009046085265993042,
"value_loss_search": 0.0039972110742269255,
"value_loss_thought": 0.0032396571875779046
},
{
"epoch": 10.8,
"grad_norm": 0.27541225586561385,
"learning_rate": 1.084032268702546e-06,
"loss": 0.016,
"sft_loss": 0.0018587965198094026,
"step": 1810,
"total_loss": 0.0019315761407113995,
"value_loss": 0.0007277963281012489,
"value_loss_search": 0.002842529457825549,
"value_loss_thought": 0.0029798412312629806
},
{
"epoch": 10.83,
"grad_norm": 0.3143782682726898,
"learning_rate": 1.0697083496945766e-06,
"loss": 0.0147,
"sft_loss": 0.0016238773765508085,
"step": 1815,
"total_loss": 0.0017014974511752711,
"value_loss": 0.0007762006878408556,
"value_loss_search": 0.003306770018977545,
"value_loss_thought": 0.002902835555346428
},
{
"epoch": 10.86,
"grad_norm": 0.20528023876815246,
"learning_rate": 1.0554538938850067e-06,
"loss": 0.0127,
"sft_loss": 0.0010624434828059748,
"step": 1820,
"total_loss": 0.0011552063171336613,
"value_loss": 0.0009276282694486327,
"value_loss_search": 0.0039089625636478335,
"value_loss_thought": 0.0035120635659950496
},
{
"epoch": 10.89,
"grad_norm": 0.3193867192070165,
"learning_rate": 1.0412695935522915e-06,
"loss": 0.0139,
"sft_loss": 0.0015649513312382623,
"step": 1825,
"total_loss": 0.001644154928186481,
"value_loss": 0.0007920360109665125,
"value_loss_search": 0.003427566871346244,
"value_loss_thought": 0.0029087212127251404
},
{
"epoch": 10.92,
"grad_norm": 0.2485596631221815,
"learning_rate": 1.0271561375677295e-06,
"loss": 0.0141,
"sft_loss": 0.0012402831809595228,
"step": 1830,
"total_loss": 0.0013410342163723498,
"value_loss": 0.0010075102685675574,
"value_loss_search": 0.0032375619698996163,
"value_loss_thought": 0.004822520132569253
},
{
"epoch": 10.95,
"grad_norm": 0.19629796573946343,
"learning_rate": 1.0131142113620124e-06,
"loss": 0.0145,
"sft_loss": 0.0017903442290844395,
"step": 1835,
"total_loss": 0.0018783345309895338,
"value_loss": 0.0008799031274634217,
"value_loss_search": 0.002568782726038421,
"value_loss_thought": 0.00447044234929308
},
{
"epoch": 10.98,
"grad_norm": 0.22665789788022606,
"learning_rate": 9.991444968919318e-07,
"loss": 0.016,
"sft_loss": 0.001909774899831973,
"step": 1840,
"total_loss": 0.0020184893559246574,
"value_loss": 0.0010871445186012353,
"value_loss_search": 0.0036259381746958754,
"value_loss_thought": 0.005071217814185047
},
{
"epoch": 11.01,
"grad_norm": 0.14121531206089535,
"learning_rate": 9.85247672607262e-07,
"loss": 0.0135,
"sft_loss": 0.0015843365341424941,
"step": 1845,
"total_loss": 0.0016597445963839163,
"value_loss": 0.0007540804324889904,
"value_loss_search": 0.00248998072215727,
"value_loss_thought": 0.0035426627241349705
},
{
"epoch": 11.04,
"grad_norm": 0.23904440865103438,
"learning_rate": 9.714244134178111e-07,
"loss": 0.0121,
"sft_loss": 0.001681867046863772,
"step": 1850,
"total_loss": 0.0017365749472503466,
"value_loss": 0.000547078986141969,
"value_loss_search": 0.002276956337368574,
"value_loss_thought": 0.0020996756104807446
},
{
"epoch": 11.07,
"grad_norm": 0.21369722290951315,
"learning_rate": 9.576753906606406e-07,
"loss": 0.0132,
"sft_loss": 0.001991357470978983,
"step": 1855,
"total_loss": 0.0020535454949708763,
"value_loss": 0.0006218802197111017,
"value_loss_search": 0.002578609814440824,
"value_loss_thought": 0.002396431967076751
},
{
"epoch": 11.1,
"grad_norm": 0.23349687558798843,
"learning_rate": 9.440012720674669e-07,
"loss": 0.0134,
"sft_loss": 0.0016908970777876676,
"step": 1860,
"total_loss": 0.0017617013153142125,
"value_loss": 0.0007080423307911588,
"value_loss_search": 0.002591455921628949,
"value_loss_thought": 0.003072882712245928
},
{
"epoch": 11.13,
"grad_norm": 0.2315106358683564,
"learning_rate": 9.304027217322248e-07,
"loss": 0.012,
"sft_loss": 0.0011010443093255162,
"step": 1865,
"total_loss": 0.001180766790723453,
"value_loss": 0.0007972249053409541,
"value_loss_search": 0.0028063702089980323,
"value_loss_thought": 0.003571429059320508
},
{
"epoch": 11.16,
"grad_norm": 0.19676507857751727,
"learning_rate": 9.168804000788231e-07,
"loss": 0.0134,
"sft_loss": 0.0017542458925163373,
"step": 1870,
"total_loss": 0.0018465204406766134,
"value_loss": 0.0009227454697338544,
"value_loss_search": 0.0034655480293684705,
"value_loss_thought": 0.003916415683534069
},
{
"epoch": 11.19,
"grad_norm": 0.20994293249210688,
"learning_rate": 9.034349638290643e-07,
"loss": 0.0129,
"sft_loss": 0.0016363047616323456,
"step": 1875,
"total_loss": 0.001720267212360227,
"value_loss": 0.000839624490978963,
"value_loss_search": 0.0023668420385547506,
"value_loss_thought": 0.004350153919847344
},
{
"epoch": 11.22,
"grad_norm": 0.17348271881719035,
"learning_rate": 8.90067065970753e-07,
"loss": 0.0117,
"sft_loss": 0.0015204125025775283,
"step": 1880,
"total_loss": 0.00157785642841759,
"value_loss": 0.0005744392913584306,
"value_loss_search": 0.002059218621488412,
"value_loss_thought": 0.0025362957071592973
},
{
"epoch": 11.25,
"grad_norm": 0.2244106678448663,
"learning_rate": 8.767773557259856e-07,
"loss": 0.0136,
"sft_loss": 0.0018200392310973256,
"step": 1885,
"total_loss": 0.0018952935279571647,
"value_loss": 0.0007525429511019866,
"value_loss_search": 0.0032326324541486428,
"value_loss_thought": 0.002787711161090556
},
{
"epoch": 11.28,
"grad_norm": 0.22278002454347204,
"learning_rate": 8.635664785196149e-07,
"loss": 0.0136,
"sft_loss": 0.001562042610021308,
"step": 1890,
"total_loss": 0.0016376418375557479,
"value_loss": 0.0007559922805285169,
"value_loss_search": 0.002203879163846523,
"value_loss_thought": 0.0038440591039261562
},
{
"epoch": 11.31,
"grad_norm": 0.16580402380208306,
"learning_rate": 8.504350759479085e-07,
"loss": 0.0132,
"sft_loss": 0.001630876283161342,
"step": 1895,
"total_loss": 0.0017215145897331752,
"value_loss": 0.0009063829461979367,
"value_loss_search": 0.0032823619387357893,
"value_loss_thought": 0.0039687016303560085
},
{
"epoch": 11.34,
"grad_norm": 0.1538178503850748,
"learning_rate": 8.373837857473876e-07,
"loss": 0.0124,
"sft_loss": 0.0015245357761159539,
"step": 1900,
"total_loss": 0.001594149377508103,
"value_loss": 0.0006961359632555286,
"value_loss_search": 0.0029453484618215953,
"value_loss_thought": 0.0026237392520670256
},
{
"epoch": 11.37,
"grad_norm": 0.22852979054995423,
"learning_rate": 8.244132417638572e-07,
"loss": 0.0139,
"sft_loss": 0.0018581276090117171,
"step": 1905,
"total_loss": 0.0019236322099402514,
"value_loss": 0.0006550459199843317,
"value_loss_search": 0.002184331477405976,
"value_loss_thought": 0.0030560358827074197
},
{
"epoch": 11.4,
"grad_norm": 0.17979282429716514,
"learning_rate": 8.115240739216182e-07,
"loss": 0.0135,
"sft_loss": 0.0016053789819125085,
"step": 1910,
"total_loss": 0.0016754228590059484,
"value_loss": 0.0007004386914104543,
"value_loss_search": 0.0024565162927842723,
"value_loss_thought": 0.003146993195559844
},
{
"epoch": 11.43,
"grad_norm": 0.2143766598366511,
"learning_rate": 7.987169081928808e-07,
"loss": 0.0113,
"sft_loss": 0.0012951746117323636,
"step": 1915,
"total_loss": 0.0013838895008234432,
"value_loss": 0.0008871489512898734,
"value_loss_search": 0.0030084542974748276,
"value_loss_thought": 0.004088737367703743
},
{
"epoch": 11.46,
"grad_norm": 0.16901429392294579,
"learning_rate": 7.859923665673577e-07,
"loss": 0.0138,
"sft_loss": 0.0017941196507308631,
"step": 1920,
"total_loss": 0.0018495924976690502,
"value_loss": 0.0005547285868487961,
"value_loss_search": 0.002195003875567636,
"value_loss_thought": 0.0022428248138567143
},
{
"epoch": 11.49,
"grad_norm": 0.14963676330146067,
"learning_rate": 7.733510670220592e-07,
"loss": 0.0127,
"sft_loss": 0.0016209140827413647,
"step": 1925,
"total_loss": 0.0016745970182368453,
"value_loss": 0.0005368294205496226,
"value_loss_search": 0.0021613026585526997,
"value_loss_thought": 0.0021333327193019612
},
{
"epoch": 11.52,
"grad_norm": 0.1560856898990678,
"learning_rate": 7.607936234912841e-07,
"loss": 0.0128,
"sft_loss": 0.0012635959719773382,
"step": 1930,
"total_loss": 0.0013598649700924171,
"value_loss": 0.0009626899960949231,
"value_loss_search": 0.003141054221191553,
"value_loss_thought": 0.004560465800022939
},
{
"epoch": 11.55,
"grad_norm": 0.1963172359932867,
"learning_rate": 7.48320645836797e-07,
"loss": 0.013,
"sft_loss": 0.001730994725949131,
"step": 1935,
"total_loss": 0.0018184438404333036,
"value_loss": 0.0008744911316171056,
"value_loss_search": 0.0031509772675512695,
"value_loss_thought": 0.003844951791325002
},
{
"epoch": 11.58,
"grad_norm": 0.2668333028531531,
"learning_rate": 7.359327398182145e-07,
"loss": 0.0129,
"sft_loss": 0.0013168820936698467,
"step": 1940,
"total_loss": 0.0013926543368199874,
"value_loss": 0.0007577225714385349,
"value_loss_search": 0.003037126008916857,
"value_loss_thought": 0.003024654608429955
},
{
"epoch": 11.61,
"grad_norm": 0.222868660291957,
"learning_rate": 7.236305070635835e-07,
"loss": 0.0143,
"sft_loss": 0.0016936144500505179,
"step": 1945,
"total_loss": 0.0017581287969392178,
"value_loss": 0.0006451433262554928,
"value_loss_search": 0.0023534947292660037,
"value_loss_thought": 0.002807651879811601
},
{
"epoch": 11.64,
"grad_norm": 0.361284370578401,
"learning_rate": 7.114145450401666e-07,
"loss": 0.0128,
"sft_loss": 0.001547012195806019,
"step": 1950,
"total_loss": 0.001618750787525869,
"value_loss": 0.000717385778938251,
"value_loss_search": 0.0024508829355568197,
"value_loss_thought": 0.0032882033142414002
},
{
"epoch": 11.67,
"grad_norm": 0.13552700047134153,
"learning_rate": 6.992854470254207e-07,
"loss": 0.013,
"sft_loss": 0.001385022871545516,
"step": 1955,
"total_loss": 0.0014542039817172282,
"value_loss": 0.0006918109331763844,
"value_loss_search": 0.0027121378843503407,
"value_loss_thought": 0.0028223496028431327
},
{
"epoch": 11.7,
"grad_norm": 0.22110946390490138,
"learning_rate": 6.872438020781855e-07,
"loss": 0.0133,
"sft_loss": 0.0019112714886432513,
"step": 1960,
"total_loss": 0.0021918389610931397,
"value_loss": 0.002805674577484751,
"value_loss_search": 0.0025011180584272098,
"value_loss_thought": 0.01994427887461825
},
{
"epoch": 11.73,
"grad_norm": 0.19717560920606997,
"learning_rate": 6.752901950100796e-07,
"loss": 0.0135,
"sft_loss": 0.0014557878545019775,
"step": 1965,
"total_loss": 0.0015295138876126658,
"value_loss": 0.0007372604449756182,
"value_loss_search": 0.0031169248416858864,
"value_loss_thought": 0.0027811587181304277
},
{
"epoch": 11.76,
"grad_norm": 0.19011787464804747,
"learning_rate": 6.634252063570909e-07,
"loss": 0.0133,
"sft_loss": 0.0018082802678691223,
"step": 1970,
"total_loss": 0.0018666912741366558,
"value_loss": 0.0005841100078200157,
"value_loss_search": 0.0022486982484906546,
"value_loss_thought": 0.0024241818446625986
},
{
"epoch": 11.79,
"grad_norm": 0.1689996116493542,
"learning_rate": 6.516494123513911e-07,
"loss": 0.0121,
"sft_loss": 0.0014497063646558672,
"step": 1975,
"total_loss": 0.0015385708714291014,
"value_loss": 0.0008886449425062892,
"value_loss_search": 0.0024655752657849915,
"value_loss_thought": 0.0046435843172275785
},
{
"epoch": 11.82,
"grad_norm": 0.15943109781856352,
"learning_rate": 6.399633848933434e-07,
"loss": 0.0133,
"sft_loss": 0.0014925144583685323,
"step": 1980,
"total_loss": 0.0015526211048978666,
"value_loss": 0.0006010664543737221,
"value_loss_search": 0.0027871506780570597,
"value_loss_thought": 0.0020213810148561606
},
{
"epoch": 11.85,
"grad_norm": 0.16717165760347577,
"learning_rate": 6.283676915237307e-07,
"loss": 0.013,
"sft_loss": 0.001696960357367061,
"step": 1985,
"total_loss": 0.0017603883717299596,
"value_loss": 0.0006342800129914394,
"value_loss_search": 0.002824323731066869,
"value_loss_thought": 0.0022499163406337177
},
{
"epoch": 11.88,
"grad_norm": 0.15142680254994328,
"learning_rate": 6.16862895396193e-07,
"loss": 0.0139,
"sft_loss": 0.001581608026754111,
"step": 1990,
"total_loss": 0.0016601021626520662,
"value_loss": 0.0007849412620771545,
"value_loss_search": 0.002948937653894745,
"value_loss_thought": 0.003330592447127856
},
{
"epoch": 11.91,
"grad_norm": 0.19276901421529766,
"learning_rate": 6.054495552498779e-07,
"loss": 0.0137,
"sft_loss": 0.0018120755994459614,
"step": 1995,
"total_loss": 0.001886463062464827,
"value_loss": 0.0007438746942170837,
"value_loss_search": 0.0030197886319228927,
"value_loss_thought": 0.0029312088817277982
},
{
"epoch": 11.94,
"grad_norm": 0.15841124656299638,
"learning_rate": 5.941282253823019e-07,
"loss": 0.0124,
"sft_loss": 0.0017132775596110151,
"step": 2000,
"total_loss": 0.0018226395098338345,
"value_loss": 0.0010936193576071673,
"value_loss_search": 0.004504057419785568,
"value_loss_thought": 0.004244897459489039
},
{
"epoch": 11.97,
"grad_norm": 0.19080328469455396,
"learning_rate": 5.828994556224333e-07,
"loss": 0.0133,
"sft_loss": 0.0016100038104923443,
"step": 2005,
"total_loss": 0.001695921379121046,
"value_loss": 0.0008591757061367389,
"value_loss_search": 0.0032462079869560513,
"value_loss_thought": 0.0036271976513944535
},
{
"epoch": 12.0,
"grad_norm": 0.42540669262730385,
"learning_rate": 5.717637913039895e-07,
"loss": 0.0142,
"sft_loss": 0.0014130673225736246,
"step": 2010,
"total_loss": 0.001474345298233004,
"value_loss": 0.0006127797149346748,
"value_loss_search": 0.0022098295013165624,
"value_loss_thought": 0.002692408211157726
},
{
"epoch": 12.03,
"grad_norm": 0.13234597224013306,
"learning_rate": 5.607217732389503e-07,
"loss": 0.0134,
"sft_loss": 0.00153753467311617,
"step": 2015,
"total_loss": 0.0015983398959235728,
"value_loss": 0.0006080522839738478,
"value_loss_search": 0.002334933211577095,
"value_loss_thought": 0.002529485058130376
},
{
"epoch": 12.06,
"grad_norm": 0.137160131767248,
"learning_rate": 5.497739376912956e-07,
"loss": 0.0109,
"sft_loss": 0.0011861874081660062,
"step": 2020,
"total_loss": 0.0012396710289571899,
"value_loss": 0.0005348361521200218,
"value_loss_search": 0.0019204338413487676,
"value_loss_thought": 0.002358255369199469
},
{
"epoch": 12.09,
"grad_norm": 0.12635424611743207,
"learning_rate": 5.389208163509585e-07,
"loss": 0.0132,
"sft_loss": 0.0018190979899372905,
"step": 2025,
"total_loss": 0.001879164192882854,
"value_loss": 0.0006006620240896155,
"value_loss_search": 0.0024004459903380847,
"value_loss_thought": 0.002404850223280164
},
{
"epoch": 12.11,
"grad_norm": 0.18160270722099203,
"learning_rate": 5.281629363080054e-07,
"loss": 0.0122,
"sft_loss": 0.0015674298861995339,
"step": 2030,
"total_loss": 0.001623824713238875,
"value_loss": 0.0005639481589923889,
"value_loss_search": 0.002164925300516529,
"value_loss_thought": 0.002346659959630415
},
{
"epoch": 12.14,
"grad_norm": 0.1417342739770937,
"learning_rate": 5.175008200270368e-07,
"loss": 0.0125,
"sft_loss": 0.0014136525540379807,
"step": 2035,
"total_loss": 0.0014688616164136193,
"value_loss": 0.0005520905897583361,
"value_loss_search": 0.0017787457187345979,
"value_loss_thought": 0.0026379790163218787
},
{
"epoch": 12.17,
"grad_norm": 0.1290358512992214,
"learning_rate": 5.06934985321813e-07,
"loss": 0.0113,
"sft_loss": 0.0016105213377159088,
"step": 2040,
"total_loss": 0.0016679565724615486,
"value_loss": 0.0005743524450053883,
"value_loss_search": 0.0021123728056977598,
"value_loss_thought": 0.0024824467762755375
},
{
"epoch": 12.2,
"grad_norm": 0.17303760430312853,
"learning_rate": 4.964659453301088e-07,
"loss": 0.0125,
"sft_loss": 0.0017713219043798746,
"step": 2045,
"total_loss": 0.001820996348112658,
"value_loss": 0.0004967445772763313,
"value_loss_search": 0.0021500766970802944,
"value_loss_thought": 0.0018238799311802723
},
{
"epoch": 12.23,
"grad_norm": 0.13656860205410482,
"learning_rate": 4.860942084887868e-07,
"loss": 0.012,
"sft_loss": 0.0011710747960023582,
"step": 2050,
"total_loss": 0.0012223293429954652,
"value_loss": 0.0005125455333654827,
"value_loss_search": 0.0025454429445233018,
"value_loss_thought": 0.0015549213211500045
},
{
"epoch": 12.26,
"grad_norm": 0.17146623033662603,
"learning_rate": 4.758202785091118e-07,
"loss": 0.0122,
"sft_loss": 0.0015416829177411274,
"step": 2055,
"total_loss": 0.0015948826805882276,
"value_loss": 0.0005319975216764306,
"value_loss_search": 0.0020023466028078474,
"value_loss_thought": 0.002253633565032942
},
{
"epoch": 12.29,
"grad_norm": 0.13131858410389194,
"learning_rate": 4.656446543522822e-07,
"loss": 0.0114,
"sft_loss": 0.0015963483776431531,
"step": 2060,
"total_loss": 0.0016545026784314132,
"value_loss": 0.000581543093721848,
"value_loss_search": 0.002123642799631398,
"value_loss_thought": 0.00252870192825867
},
{
"epoch": 12.32,
"grad_norm": 0.1387225576415455,
"learning_rate": 4.555678302051988e-07,
"loss": 0.0125,
"sft_loss": 0.0011560205864952877,
"step": 2065,
"total_loss": 0.0012139756489808918,
"value_loss": 0.0005795506685103647,
"value_loss_search": 0.0022142785405321773,
"value_loss_thought": 0.0024221268018152385
},
{
"epoch": 12.35,
"grad_norm": 0.13838555141205885,
"learning_rate": 4.4559029545646835e-07,
"loss": 0.0128,
"sft_loss": 0.0019845320377498863,
"step": 2070,
"total_loss": 0.002043302868487018,
"value_loss": 0.0005877082873098516,
"value_loss_search": 0.001886277649805379,
"value_loss_thought": 0.0028153886438985866
},
{
"epoch": 12.38,
"grad_norm": 0.1372443532006686,
"learning_rate": 4.357125346726293e-07,
"loss": 0.013,
"sft_loss": 0.001509127317694947,
"step": 2075,
"total_loss": 0.0015627349823276403,
"value_loss": 0.0005360765391628775,
"value_loss_search": 0.0020487196703186327,
"value_loss_thought": 0.0022398926420464705
},
{
"epoch": 12.41,
"grad_norm": 0.13844786045080867,
"learning_rate": 4.2593502757462326e-07,
"loss": 0.0122,
"sft_loss": 0.0017603133688680827,
"step": 2080,
"total_loss": 0.0018168458402463728,
"value_loss": 0.0005653246701513126,
"value_loss_search": 0.0017796060631980026,
"value_loss_thought": 0.0027429913518062675
},
{
"epoch": 12.44,
"grad_norm": 0.13268164718583428,
"learning_rate": 4.162582490144948e-07,
"loss": 0.0134,
"sft_loss": 0.001744911610148847,
"step": 2085,
"total_loss": 0.001791581775782447,
"value_loss": 0.0004667015931431706,
"value_loss_search": 0.002076370053646315,
"value_loss_thought": 0.0016572426828588504
},
{
"epoch": 12.47,
"grad_norm": 0.1639448919467817,
"learning_rate": 4.066826689523329e-07,
"loss": 0.0124,
"sft_loss": 0.0014559761038981378,
"step": 2090,
"total_loss": 0.0015073618816018098,
"value_loss": 0.0005138578154856077,
"value_loss_search": 0.0021935081386459387,
"value_loss_thought": 0.0019173543787928792
},
{
"epoch": 12.5,
"grad_norm": 0.12404791787908588,
"learning_rate": 3.972087524334417e-07,
"loss": 0.0129,
"sft_loss": 0.0014528931671520696,
"step": 2095,
"total_loss": 0.0015010984525360982,
"value_loss": 0.00048205279412059097,
"value_loss_search": 0.0017939364208864107,
"value_loss_thought": 0.002062485937221936
},
{
"epoch": 12.53,
"grad_norm": 0.11913134655746999,
"learning_rate": 3.8783695956576104e-07,
"loss": 0.0124,
"sft_loss": 0.0014827497507212684,
"step": 2100,
"total_loss": 0.001538775590775998,
"value_loss": 0.0005602583627023705,
"value_loss_search": 0.0021692122728495635,
"value_loss_thought": 0.0023128546302928045
},
{
"epoch": 12.56,
"grad_norm": 0.2114888612576018,
"learning_rate": 3.785677454975162e-07,
"loss": 0.0129,
"sft_loss": 0.0016362351918360219,
"step": 2105,
"total_loss": 0.0016834996304623928,
"value_loss": 0.00047264444830261707,
"value_loss_search": 0.0016264815088106843,
"value_loss_thought": 0.002154674075870844
},
{
"epoch": 12.59,
"grad_norm": 0.15216955248234498,
"learning_rate": 3.6940156039511536e-07,
"loss": 0.0131,
"sft_loss": 0.0011432622733991594,
"step": 2110,
"total_loss": 0.001217301837471041,
"value_loss": 0.0007403955911058802,
"value_loss_search": 0.003449674524733837,
"value_loss_thought": 0.00247349016779026
},
{
"epoch": 12.62,
"grad_norm": 0.16263392439324273,
"learning_rate": 3.603388494212892e-07,
"loss": 0.013,
"sft_loss": 0.0014238910138374195,
"step": 2115,
"total_loss": 0.001483250133594538,
"value_loss": 0.0005935910872267413,
"value_loss_search": 0.0018694708225950763,
"value_loss_thought": 0.0028792579010541884
},
{
"epoch": 12.65,
"grad_norm": 0.1366455163894168,
"learning_rate": 3.5138005271346643e-07,
"loss": 0.0125,
"sft_loss": 0.0016499412042321638,
"step": 2120,
"total_loss": 0.001711774785312059,
"value_loss": 0.0006183358492023672,
"value_loss_search": 0.002253155542811669,
"value_loss_thought": 0.002693531273280314
},
{
"epoch": 12.68,
"grad_norm": 0.1723979401773517,
"learning_rate": 3.425256053624013e-07,
"loss": 0.0136,
"sft_loss": 0.0012428588001057506,
"step": 2125,
"total_loss": 0.0012996991815896308,
"value_loss": 0.0005684036681486759,
"value_loss_search": 0.002460998028504946,
"value_loss_thought": 0.002086231320936349
},
{
"epoch": 12.71,
"grad_norm": 0.13838742752770622,
"learning_rate": 3.3377593739104207e-07,
"loss": 0.0124,
"sft_loss": 0.001705869528814219,
"step": 2130,
"total_loss": 0.0017658297125933587,
"value_loss": 0.0005996018458176878,
"value_loss_search": 0.0025955964968829904,
"value_loss_thought": 0.002201218291384066
},
{
"epoch": 12.74,
"grad_norm": 0.11587188180305923,
"learning_rate": 3.2513147373364864e-07,
"loss": 0.0119,
"sft_loss": 0.001407682741410099,
"step": 2135,
"total_loss": 0.0014551524436541285,
"value_loss": 0.0004746971244514953,
"value_loss_search": 0.0019987275512676206,
"value_loss_thought": 0.0017988494690371227
},
{
"epoch": 12.77,
"grad_norm": 0.14689001469256818,
"learning_rate": 3.165926342151518e-07,
"loss": 0.0123,
"sft_loss": 0.0012021351605653763,
"step": 2140,
"total_loss": 0.0012947583980004395,
"value_loss": 0.0009262323781513259,
"value_loss_search": 0.002333370926893963,
"value_loss_thought": 0.005076488072023722
},
{
"epoch": 12.8,
"grad_norm": 0.19822181831478017,
"learning_rate": 3.0815983353076647e-07,
"loss": 0.0128,
"sft_loss": 0.0016310029430314898,
"step": 2145,
"total_loss": 0.0016860840906559814,
"value_loss": 0.0005508116572968902,
"value_loss_search": 0.0021066489629220086,
"value_loss_thought": 0.0022998442878133575
},
{
"epoch": 12.83,
"grad_norm": 0.16816840054773924,
"learning_rate": 2.998334812258524e-07,
"loss": 0.0108,
"sft_loss": 0.0014653883612481878,
"step": 2150,
"total_loss": 0.0015446662450301573,
"value_loss": 0.0007927789123186813,
"value_loss_search": 0.0023768133905377907,
"value_loss_thought": 0.00396541793236338
},
{
"epoch": 12.86,
"grad_norm": 0.16375843499526804,
"learning_rate": 2.9161398167602053e-07,
"loss": 0.0126,
"sft_loss": 0.001740490208612755,
"step": 2155,
"total_loss": 0.0018277797405071184,
"value_loss": 0.0008728954095772679,
"value_loss_search": 0.0033813179442745422,
"value_loss_thought": 0.003601845331900222
},
{
"epoch": 12.89,
"grad_norm": 0.24414748866003813,
"learning_rate": 2.8350173406749975e-07,
"loss": 0.0131,
"sft_loss": 0.0017086814332287759,
"step": 2160,
"total_loss": 0.001792904332711487,
"value_loss": 0.0008422289131431171,
"value_loss_search": 0.002163193000239971,
"value_loss_thought": 0.0045746383284949845
},
{
"epoch": 12.92,
"grad_norm": 0.14738499094459379,
"learning_rate": 2.75497132377745e-07,
"loss": 0.0126,
"sft_loss": 0.0015413039014674722,
"step": 2165,
"total_loss": 0.0015857949202086274,
"value_loss": 0.00044491018940107097,
"value_loss_search": 0.0015283264680988396,
"value_loss_thought": 0.0020309550552781276
},
{
"epoch": 12.95,
"grad_norm": 0.1314803478632567,
"learning_rate": 2.676005653563063e-07,
"loss": 0.0122,
"sft_loss": 0.0012932573270518332,
"step": 2170,
"total_loss": 0.0013801320299876353,
"value_loss": 0.0008687470835752719,
"value_loss_search": 0.0031730859160461478,
"value_loss_thought": 0.0037768907095369285
},
{
"epoch": 12.98,
"grad_norm": 0.15101872666959495,
"learning_rate": 2.5981241650594736e-07,
"loss": 0.0116,
"sft_loss": 0.001474944083020091,
"step": 2175,
"total_loss": 0.0015574592757964466,
"value_loss": 0.0008251519188775091,
"value_loss_search": 0.0024292909915487825,
"value_loss_thought": 0.00417192430927571
},
{
"epoch": 13.01,
"grad_norm": 0.12826566707799528,
"learning_rate": 2.5213306406402263e-07,
"loss": 0.0133,
"sft_loss": 0.0017561075917910784,
"step": 2180,
"total_loss": 0.0018071690113764306,
"value_loss": 0.0005106141129658682,
"value_loss_search": 0.0019689877941118537,
"value_loss_thought": 0.0021159251197332197
},
{
"epoch": 13.04,
"grad_norm": 0.1709754478860301,
"learning_rate": 2.445628809841055e-07,
"loss": 0.0114,
"sft_loss": 0.0015144431439694018,
"step": 2185,
"total_loss": 0.001573674503174516,
"value_loss": 0.0005923136392084416,
"value_loss_search": 0.001896365256311583,
"value_loss_thought": 0.0028421438521490927
},
{
"epoch": 13.07,
"grad_norm": 0.10695694103625902,
"learning_rate": 2.3710223491787643e-07,
"loss": 0.0115,
"sft_loss": 0.0011580413149204106,
"step": 2190,
"total_loss": 0.0012181214089466152,
"value_loss": 0.0006008008974731638,
"value_loss_search": 0.0022067354780915594,
"value_loss_thought": 0.0025996716371764705
},
{
"epoch": 13.1,
"grad_norm": 0.1223481366934227,
"learning_rate": 2.2975148819726844e-07,
"loss": 0.0123,
"sft_loss": 0.0013828211929649114,
"step": 2195,
"total_loss": 0.001459511714705286,
"value_loss": 0.0007669052273740817,
"value_loss_search": 0.0029846215529801155,
"value_loss_thought": 0.0031506202247840063
},
{
"epoch": 13.13,
"grad_norm": 0.12035969601588177,
"learning_rate": 2.2251099781686853e-07,
"loss": 0.0118,
"sft_loss": 0.0010231956985080615,
"step": 2200,
"total_loss": 0.0010796000485257195,
"value_loss": 0.0005640435443638126,
"value_loss_search": 0.0022480406605694726,
"value_loss_thought": 0.0022643077172460834
},
{
"epoch": 13.16,
"grad_norm": 0.173715525744218,
"learning_rate": 2.1538111541658246e-07,
"loss": 0.0115,
"sft_loss": 0.0012041608191793785,
"step": 2205,
"total_loss": 0.0012736963247903077,
"value_loss": 0.0006953551075753239,
"value_loss_search": 0.0023029572450525395,
"value_loss_thought": 0.0032598836100532933
},
{
"epoch": 13.19,
"grad_norm": 0.12560183999239857,
"learning_rate": 2.0836218726455416e-07,
"loss": 0.0116,
"sft_loss": 0.001454232243122533,
"step": 2210,
"total_loss": 0.0015192492540819559,
"value_loss": 0.0006501699334421574,
"value_loss_search": 0.0016968745266581209,
"value_loss_thought": 0.0035044849046244053
},
{
"epoch": 13.22,
"grad_norm": 0.15088975343987443,
"learning_rate": 2.0145455424035065e-07,
"loss": 0.0123,
"sft_loss": 0.0014402579807210713,
"step": 2215,
"total_loss": 0.001486383965337268,
"value_loss": 0.00046125984255240837,
"value_loss_search": 0.0020269616570260498,
"value_loss_thought": 0.001663117084353871
},
{
"epoch": 13.25,
"grad_norm": 0.1168644695587691,
"learning_rate": 1.9465855181840742e-07,
"loss": 0.0123,
"sft_loss": 0.0016895142354769633,
"step": 2220,
"total_loss": 0.0017325070250297614,
"value_loss": 0.0004299280713553344,
"value_loss_search": 0.0017563865643637655,
"value_loss_thought": 0.0016830380049214
},
{
"epoch": 13.28,
"grad_norm": 0.12969058359006697,
"learning_rate": 1.8797451005173384e-07,
"loss": 0.0126,
"sft_loss": 0.0016237141971942037,
"step": 2225,
"total_loss": 0.0016805375176829785,
"value_loss": 0.000568233198055168,
"value_loss_search": 0.0019997224272742644,
"value_loss_thought": 0.002546143160088832
},
{
"epoch": 13.31,
"grad_norm": 0.1464573687686116,
"learning_rate": 1.8140275355588682e-07,
"loss": 0.0119,
"sft_loss": 0.0013673121546162292,
"step": 2230,
"total_loss": 0.0014293207376852024,
"value_loss": 0.0006200857808948967,
"value_loss_search": 0.0028769256222062724,
"value_loss_thought": 0.0020837606152554144
},
{
"epoch": 13.34,
"grad_norm": 0.13416087510039532,
"learning_rate": 1.749436014932021e-07,
"loss": 0.0131,
"sft_loss": 0.001349821488838643,
"step": 2235,
"total_loss": 0.001453561357747901,
"value_loss": 0.0010373986635158871,
"value_loss_search": 0.003940903465206702,
"value_loss_thought": 0.004358286027468239
},
{
"epoch": 13.37,
"grad_norm": 0.12129590354158527,
"learning_rate": 1.68597367557298e-07,
"loss": 0.0122,
"sft_loss": 0.001293862346210517,
"step": 2240,
"total_loss": 0.0013660816371590557,
"value_loss": 0.000722192872768801,
"value_loss_search": 0.002515532513518792,
"value_loss_thought": 0.0032620103815133917
},
{
"epoch": 13.4,
"grad_norm": 0.13830951441212483,
"learning_rate": 1.6236435995783644e-07,
"loss": 0.0122,
"sft_loss": 0.0014756130083696916,
"step": 2245,
"total_loss": 0.0015344153451877674,
"value_loss": 0.000588023400473503,
"value_loss_search": 0.0021232158583757155,
"value_loss_thought": 0.0025809713747094063
},
{
"epoch": 13.43,
"grad_norm": 0.14177549511437526,
"learning_rate": 1.5624488140555673e-07,
"loss": 0.0122,
"sft_loss": 0.001884979850728996,
"step": 2250,
"total_loss": 0.0019392464295002298,
"value_loss": 0.0005426657894744835,
"value_loss_search": 0.002217422648698175,
"value_loss_thought": 0.0021239036757833675
},
{
"epoch": 13.46,
"grad_norm": 0.1357612842710405,
"learning_rate": 1.5023922909757543e-07,
"loss": 0.0116,
"sft_loss": 0.001019277679733932,
"step": 2255,
"total_loss": 0.0010746703279515657,
"value_loss": 0.000553926424036888,
"value_loss_search": 0.0021177719290335515,
"value_loss_thought": 0.002313639441763371
},
{
"epoch": 13.49,
"grad_norm": 0.1299080051431472,
"learning_rate": 1.44347694702949e-07,
"loss": 0.0122,
"sft_loss": 0.0013602351624285801,
"step": 2260,
"total_loss": 0.001403163997850676,
"value_loss": 0.0004292882472554993,
"value_loss_search": 0.0016838242217204424,
"value_loss_thought": 0.0017504817547660423
},
{
"epoch": 13.52,
"grad_norm": 0.1286701282026923,
"learning_rate": 1.3857056434851301e-07,
"loss": 0.0116,
"sft_loss": 0.0010856040549697354,
"step": 2265,
"total_loss": 0.001160654007901485,
"value_loss": 0.0007504995341719223,
"value_loss_search": 0.002396246173121419,
"value_loss_thought": 0.003607750001219756
},
{
"epoch": 13.55,
"grad_norm": 0.14396728510258697,
"learning_rate": 1.3290811860498242e-07,
"loss": 0.0108,
"sft_loss": 0.0011329900531563907,
"step": 2270,
"total_loss": 0.001192187089957031,
"value_loss": 0.0005919702982509989,
"value_loss_search": 0.001987281997116952,
"value_loss_thought": 0.0027484804012374298
},
{
"epoch": 13.58,
"grad_norm": 0.11133502723535868,
"learning_rate": 1.273606324733284e-07,
"loss": 0.0119,
"sft_loss": 0.0012918035121401773,
"step": 2275,
"total_loss": 0.0013533333825648697,
"value_loss": 0.0006152988007769977,
"value_loss_search": 0.0025145169366169286,
"value_loss_thought": 0.002407873464107979
},
{
"epoch": 13.61,
"grad_norm": 0.12100244913687518,
"learning_rate": 1.2192837537142065e-07,
"loss": 0.0115,
"sft_loss": 0.0015881910978350789,
"step": 2280,
"total_loss": 0.0016699408407248484,
"value_loss": 0.0008174974953490732,
"value_loss_search": 0.0026218705085824467,
"value_loss_thought": 0.003918109554024341
},
{
"epoch": 13.64,
"grad_norm": 0.1389992640143122,
"learning_rate": 1.1661161112094421e-07,
"loss": 0.0116,
"sft_loss": 0.0014317982335342095,
"step": 2285,
"total_loss": 0.0014963600385272003,
"value_loss": 0.0006456180733948713,
"value_loss_search": 0.0020672334404252977,
"value_loss_thought": 0.0030977111006450287
},
{
"epoch": 13.67,
"grad_norm": 0.1376806904958747,
"learning_rate": 1.1141059793458586e-07,
"loss": 0.0135,
"sft_loss": 0.001531593399704434,
"step": 2290,
"total_loss": 0.0015879040782849074,
"value_loss": 0.0005631069248011045,
"value_loss_search": 0.002264327982754821,
"value_loss_thought": 0.002240527437061246
},
{
"epoch": 13.7,
"grad_norm": 0.15507672061398864,
"learning_rate": 1.0632558840349333e-07,
"loss": 0.0127,
"sft_loss": 0.001709194021532312,
"step": 2295,
"total_loss": 0.0017596675465483714,
"value_loss": 0.0005047351388384414,
"value_loss_search": 0.0021137851630555816,
"value_loss_thought": 0.0019240959423768799
},
{
"epoch": 13.73,
"grad_norm": 0.13950654690195688,
"learning_rate": 1.0135682948501146e-07,
"loss": 0.011,
"sft_loss": 0.0016408312105340884,
"step": 2300,
"total_loss": 0.0016937724493836016,
"value_loss": 0.0005294122824125225,
"value_loss_search": 0.002247820197192141,
"value_loss_thought": 0.0019874780593738704
},
{
"epoch": 13.76,
"grad_norm": 0.14603248892583517,
"learning_rate": 9.650456249068268e-08,
"loss": 0.0125,
"sft_loss": 0.001476616770378314,
"step": 2305,
"total_loss": 0.001532993128402893,
"value_loss": 0.0005637636299979931,
"value_loss_search": 0.0019370973135977465,
"value_loss_thought": 0.0025730117005196007
},
{
"epoch": 13.79,
"grad_norm": 0.12093820892491877,
"learning_rate": 9.176902307453328e-08,
"loss": 0.0123,
"sft_loss": 0.0016192490846151486,
"step": 2310,
"total_loss": 0.0016830127960190567,
"value_loss": 0.0006376370715429402,
"value_loss_search": 0.0025056022790977293,
"value_loss_thought": 0.0025954942909493183
},
{
"epoch": 13.82,
"grad_norm": 0.13585785284804588,
"learning_rate": 8.715044122162508e-08,
"loss": 0.0127,
"sft_loss": 0.0017002749460516497,
"step": 2315,
"total_loss": 0.0017550808576103805,
"value_loss": 0.0005480592571984744,
"value_loss_search": 0.0021303763422451994,
"value_loss_thought": 0.002254097702507352
},
{
"epoch": 13.85,
"grad_norm": 0.14486121709588692,
"learning_rate": 8.264904123688745e-08,
"loss": 0.0119,
"sft_loss": 0.001309369836235419,
"step": 2320,
"total_loss": 0.0013705807045170104,
"value_loss": 0.0006121086411894794,
"value_loss_search": 0.0019892391615371706,
"value_loss_thought": 0.002907629985912763
},
{
"epoch": 13.88,
"grad_norm": 0.13756062164141314,
"learning_rate": 7.826504173422372e-08,
"loss": 0.0131,
"sft_loss": 0.0015762085182359441,
"step": 2325,
"total_loss": 0.0016776628372952018,
"value_loss": 0.0010145431685145922,
"value_loss_search": 0.0035052792621627304,
"value_loss_thought": 0.004611065951957016
},
{
"epoch": 13.91,
"grad_norm": 0.12390462036780822,
"learning_rate": 7.399865562589315e-08,
"loss": 0.0131,
"sft_loss": 0.0017432003776775673,
"step": 2330,
"total_loss": 0.0018063452240141941,
"value_loss": 0.0006314483902201574,
"value_loss_search": 0.001986952345464488,
"value_loss_thought": 0.003064634781657105
},
{
"epoch": 13.94,
"grad_norm": 0.1357904093704358,
"learning_rate": 6.985009011217209e-08,
"loss": 0.0129,
"sft_loss": 0.001526657902286388,
"step": 2335,
"total_loss": 0.0015822424648092692,
"value_loss": 0.0005558455008667807,
"value_loss_search": 0.0019112108780859672,
"value_loss_thought": 0.0025355531076371564
},
{
"epoch": 13.96,
"grad_norm": 0.12173848490891062,
"learning_rate": 6.581954667128965e-08,
"loss": 0.0125,
"sft_loss": 0.0019005106441909447,
"step": 2340,
"total_loss": 0.0019535693316782956,
"value_loss": 0.0005305868885898235,
"value_loss_search": 0.0019408738701571337,
"value_loss_thought": 0.0023038212573055715
},
{
"epoch": 13.99,
"grad_norm": 0.12311131791939463,
"learning_rate": 6.190722104964436e-08,
"loss": 0.0132,
"sft_loss": 0.0012315514002693817,
"step": 2345,
"total_loss": 0.0012908000820957,
"value_loss": 0.0005924867479691898,
"value_loss_search": 0.002214185446916872,
"value_loss_thought": 0.0025257085178168382
},
{
"epoch": 14.02,
"grad_norm": 0.11289506914779934,
"learning_rate": 5.811330325229569e-08,
"loss": 0.012,
"sft_loss": 0.0014274256362114101,
"step": 2350,
"total_loss": 0.00148404497326311,
"value_loss": 0.0005661933894430149,
"value_loss_search": 0.0018998756612177204,
"value_loss_thought": 0.0026296714639101994
},
{
"epoch": 14.05,
"grad_norm": 0.14032918438561062,
"learning_rate": 5.443797753373864e-08,
"loss": 0.012,
"sft_loss": 0.001353855719207786,
"step": 2355,
"total_loss": 0.0014032512601801273,
"value_loss": 0.0004939554979955574,
"value_loss_search": 0.0016622686555592737,
"value_loss_thought": 0.0022893753313837804
},
{
"epoch": 14.08,
"grad_norm": 0.1302537325621518,
"learning_rate": 5.0881422388952275e-08,
"loss": 0.012,
"sft_loss": 0.0009611410961952061,
"step": 2360,
"total_loss": 0.001024993611773084,
"value_loss": 0.0006385252608311021,
"value_loss_search": 0.0032244599921341433,
"value_loss_thought": 0.0018837421045418523
},
{
"epoch": 14.11,
"grad_norm": 0.12164242099090507,
"learning_rate": 4.7443810544734456e-08,
"loss": 0.0116,
"sft_loss": 0.0013725335855269804,
"step": 2365,
"total_loss": 0.0014293210130739808,
"value_loss": 0.0005678743107637274,
"value_loss_search": 0.002384104471229875,
"value_loss_thought": 0.00215889002156473
},
{
"epoch": 14.14,
"grad_norm": 0.163492619946634,
"learning_rate": 4.412530895131051e-08,
"loss": 0.0115,
"sft_loss": 0.0012715687771560624,
"step": 2370,
"total_loss": 0.001382276511756686,
"value_loss": 0.0011070772634695913,
"value_loss_search": 0.0019688921961233063,
"value_loss_thought": 0.006887725907620279
},
{
"epoch": 14.17,
"grad_norm": 0.1481763234968163,
"learning_rate": 4.092607877422578e-08,
"loss": 0.0111,
"sft_loss": 0.0013375403970712796,
"step": 2375,
"total_loss": 0.0014074473611998429,
"value_loss": 0.0006990696618686343,
"value_loss_search": 0.0029466108325550524,
"value_loss_thought": 0.0026459465245352474
},
{
"epoch": 14.2,
"grad_norm": 0.1186453351073309,
"learning_rate": 3.784627538652025e-08,
"loss": 0.0112,
"sft_loss": 0.0014332133869174868,
"step": 2380,
"total_loss": 0.0014889124539600117,
"value_loss": 0.0005569907084790771,
"value_loss_search": 0.002353395393970459,
"value_loss_thought": 0.002102530315414697
},
{
"epoch": 14.23,
"grad_norm": 0.13825618980337176,
"learning_rate": 3.488604836117987e-08,
"loss": 0.0113,
"sft_loss": 0.0014167566667310893,
"step": 2385,
"total_loss": 0.0014698782767844421,
"value_loss": 0.0005312162420523237,
"value_loss_search": 0.0019338703364951471,
"value_loss_thought": 0.002315859655487884
},
{
"epoch": 14.26,
"grad_norm": 0.13774279281432664,
"learning_rate": 3.204554146387456e-08,
"loss": 0.0121,
"sft_loss": 0.0012910763907711953,
"step": 2390,
"total_loss": 0.0013450498239109265,
"value_loss": 0.0005397343376785102,
"value_loss_search": 0.0018899123013738973,
"value_loss_thought": 0.002427962389350569
},
{
"epoch": 14.29,
"grad_norm": 0.13075550190644245,
"learning_rate": 2.9324892645975766e-08,
"loss": 0.0125,
"sft_loss": 0.0016997230239212513,
"step": 2395,
"total_loss": 0.0017409329679125563,
"value_loss": 0.0004120994811614764,
"value_loss_search": 0.0016292081562824024,
"value_loss_thought": 0.0016675876831413917
},
{
"epoch": 14.32,
"grad_norm": 0.12955374374537487,
"learning_rate": 2.67242340378554e-08,
"loss": 0.013,
"sft_loss": 0.0015193151630228385,
"step": 2400,
"total_loss": 0.0015746116821185653,
"value_loss": 0.0005529651271899638,
"value_loss_search": 0.0023058261356823095,
"value_loss_thought": 0.0021178948942633725
},
{
"epoch": 14.35,
"grad_norm": 0.13785257512036658,
"learning_rate": 2.4243691942471004e-08,
"loss": 0.0129,
"sft_loss": 0.0013197832508012653,
"step": 2405,
"total_loss": 0.001368108755303865,
"value_loss": 0.00048325501416002226,
"value_loss_search": 0.002034584193029332,
"value_loss_thought": 0.0018314558808242508
},
{
"epoch": 14.38,
"grad_norm": 0.1248830545164438,
"learning_rate": 2.1883386829229802e-08,
"loss": 0.0117,
"sft_loss": 0.0015428359998622908,
"step": 2410,
"total_loss": 0.0015903647321067638,
"value_loss": 0.0004752873185424278,
"value_loss_search": 0.0019664257517064245,
"value_loss_thought": 0.0018358727832037403
},
{
"epoch": 14.41,
"grad_norm": 0.1284155399497688,
"learning_rate": 1.9643433328139507e-08,
"loss": 0.0129,
"sft_loss": 0.0015311524126445874,
"step": 2415,
"total_loss": 0.0015765634343665625,
"value_loss": 0.0004541101951872406,
"value_loss_search": 0.0018012137584236144,
"value_loss_thought": 0.0018316678463634161
},
{
"epoch": 14.44,
"grad_norm": 0.13722313083098495,
"learning_rate": 1.7523940224239422e-08,
"loss": 0.0129,
"sft_loss": 0.0018140839121770114,
"step": 2420,
"total_loss": 0.001872726490603327,
"value_loss": 0.0005864257679718321,
"value_loss_search": 0.0018046398709088863,
"value_loss_thought": 0.002886766302572141
},
{
"epoch": 14.47,
"grad_norm": 0.13659337122897194,
"learning_rate": 1.5525010452319966e-08,
"loss": 0.0118,
"sft_loss": 0.001406003290321678,
"step": 2425,
"total_loss": 0.0014668999268792505,
"value_loss": 0.0006089662623253389,
"value_loss_search": 0.0023407790422652398,
"value_loss_thought": 0.0025309510473562114
},
{
"epoch": 14.5,
"grad_norm": 0.1630202519081285,
"learning_rate": 1.3646741091920546e-08,
"loss": 0.012,
"sft_loss": 0.0018367367767496035,
"step": 2430,
"total_loss": 0.0018952648396265205,
"value_loss": 0.0005852805467498001,
"value_loss_search": 0.0021444437501230594,
"value_loss_thought": 0.002537800629784215
},
{
"epoch": 14.53,
"grad_norm": 0.14649930933179126,
"learning_rate": 1.1889223362616664e-08,
"loss": 0.0137,
"sft_loss": 0.0014180621423292906,
"step": 2435,
"total_loss": 0.0014755390043148964,
"value_loss": 0.0005747684329776348,
"value_loss_search": 0.001847173216401643,
"value_loss_thought": 0.0027509742275924507
},
{
"epoch": 14.56,
"grad_norm": 0.12468828325929393,
"learning_rate": 1.0252542619589856e-08,
"loss": 0.0123,
"sft_loss": 0.0015723185992101208,
"step": 2440,
"total_loss": 0.0016369965853844093,
"value_loss": 0.0006467797803679787,
"value_loss_search": 0.0025067227985736905,
"value_loss_thought": 0.0026675153821770436
},
{
"epoch": 14.59,
"grad_norm": 0.14711025562960323,
"learning_rate": 8.736778349480723e-09,
"loss": 0.0119,
"sft_loss": 0.0015009303140686824,
"step": 2445,
"total_loss": 0.0015552314092317943,
"value_loss": 0.0005430109402368543,
"value_loss_search": 0.0018667381095156088,
"value_loss_thought": 0.0024773493929330925
},
{
"epoch": 14.62,
"grad_norm": 0.1173625792384152,
"learning_rate": 7.3420041665303585e-09,
"loss": 0.0118,
"sft_loss": 0.0015992191329132766,
"step": 2450,
"total_loss": 0.0016605654201441666,
"value_loss": 0.0006134628767767936,
"value_loss_search": 0.002318676908919315,
"value_loss_thought": 0.00258902612285965
},
{
"epoch": 14.65,
"grad_norm": 0.13451903100649834,
"learning_rate": 6.068287809004314e-09,
"loss": 0.0121,
"sft_loss": 0.0015388225147034973,
"step": 2455,
"total_loss": 0.0015945610022775724,
"value_loss": 0.0005573847084178851,
"value_loss_search": 0.0020913115042260344,
"value_loss_thought": 0.002367766158249651
},
{
"epoch": 14.68,
"grad_norm": 0.1166996348736506,
"learning_rate": 4.915691135903566e-09,
"loss": 0.0115,
"sft_loss": 0.001615592051530257,
"step": 2460,
"total_loss": 0.0016700670589443688,
"value_loss": 0.0005447498988303323,
"value_loss_search": 0.0028115076490621504,
"value_loss_thought": 0.0015464915640791333
},
{
"epoch": 14.71,
"grad_norm": 0.1505633716850507,
"learning_rate": 3.884270123959144e-09,
"loss": 0.0114,
"sft_loss": 0.0012577687215525658,
"step": 2465,
"total_loss": 0.0013172450172078242,
"value_loss": 0.0005947628893864021,
"value_loss_search": 0.002692494276323032,
"value_loss_thought": 0.0020656088718169485
},
{
"epoch": 14.74,
"grad_norm": 0.1334895803298696,
"learning_rate": 2.9740748649145778e-09,
"loss": 0.0121,
"sft_loss": 0.0015028521651402117,
"step": 2470,
"total_loss": 0.0015599074833644977,
"value_loss": 0.0005705531071839686,
"value_loss_search": 0.0018976363004298946,
"value_loss_thought": 0.0026667885652045696
},
{
"epoch": 14.77,
"grad_norm": 0.11676293297773671,
"learning_rate": 2.1851495630928475e-09,
"loss": 0.0121,
"sft_loss": 0.0011438517773058265,
"step": 2475,
"total_loss": 0.001199561754755507,
"value_loss": 0.000557099866716726,
"value_loss_search": 0.002204984583852365,
"value_loss_thought": 0.002251814359776905
},
{
"epoch": 14.8,
"grad_norm": 0.12135462746119888,
"learning_rate": 1.5175325332489331e-09,
"loss": 0.0118,
"sft_loss": 0.0013419981114566326,
"step": 2480,
"total_loss": 0.0014050020730792313,
"value_loss": 0.0006300395589619257,
"value_loss_search": 0.002390774656430494,
"value_loss_thought": 0.00264954178167045
},
{
"epoch": 14.83,
"grad_norm": 0.12877818502933935,
"learning_rate": 9.712561987104685e-10,
"loss": 0.012,
"sft_loss": 0.001536320144077763,
"step": 2485,
"total_loss": 0.0015917829690181406,
"value_loss": 0.0005546283648982353,
"value_loss_search": 0.0021470139769645515,
"value_loss_thought": 0.0022900129014942648
},
{
"epoch": 14.86,
"grad_norm": 0.12977801368527653,
"learning_rate": 5.463470898017798e-10,
"loss": 0.0128,
"sft_loss": 0.00153597031312529,
"step": 2490,
"total_loss": 0.0015952805159344051,
"value_loss": 0.0005931021074616182,
"value_loss_search": 0.002620013221735462,
"value_loss_thought": 0.002124803609603987
},
{
"epoch": 14.89,
"grad_norm": 0.10846141171279486,
"learning_rate": 2.4282584255547194e-10,
"loss": 0.0121,
"sft_loss": 0.0014916551124770194,
"step": 2495,
"total_loss": 0.00154097568412368,
"value_loss": 0.0004932056985808231,
"value_loss_search": 0.001677905346400621,
"value_loss_thought": 0.0022677402528643144
},
{
"epoch": 14.92,
"grad_norm": 0.1265986974514275,
"learning_rate": 6.070719771156252e-11,
"loss": 0.0123,
"sft_loss": 0.0017506703617982567,
"step": 2500,
"total_loss": 0.0018172925318538091,
"value_loss": 0.0006662217223492916,
"value_loss_search": 0.00269593252516529,
"value_loss_thought": 0.0026338411991446266
},
{
"epoch": 14.95,
"grad_norm": 0.12729059230038972,
"learning_rate": 0.0,
"loss": 0.0116,
"sft_loss": 0.0012781478551914915,
"step": 2505,
"total_loss": 0.0013354677166319106,
"value_loss": 0.0005731985727152278,
"value_loss_search": 0.0018533691099037243,
"value_loss_thought": 0.0027322194214775665
},
{
"epoch": 14.95,
"step": 2505,
"total_flos": 0.0,
"train_loss": 0.09394951220936404,
"train_runtime": 92418.8162,
"train_samples_per_second": 3.481,
"train_steps_per_second": 0.027
}
],
"logging_steps": 5,
"max_steps": 2505,
"num_input_tokens_seen": 0,
"num_train_epochs": 15,
"save_steps": 350,
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}